From 143afb9253081edb720f861eaffa30e4a108220a Mon Sep 17 00:00:00 2001 From: Ryker Zhu Date: Wed, 29 Apr 2026 03:45:53 +0800 Subject: [PATCH 01/25] Fix clippy warnings tripped by rust-1.95.0 toolchain The toolchain bump to rust-1.95.0 introduced new lint defaults (collapsible_match, collapsible_if, explicit_counter_loop, unused_imports in test modules, etc.) that turned previously clean code into 145 hard errors under `cargo clippy --all-features -- -D warnings`. Mechanical fixes via `cargo clippy --fix --lib --tests --bins --examples` plus a small number of hand-edits where auto-fix wouldn't apply. No semantic changes. Surfaces this would have stayed buried indefinitely otherwise: the clippy pre-commit hook only fires on Rust file changes, and the broken state predated the most recent .rs commit. --- examples/read_office_files.rs | 30 +- soapberry-zip/src/office.rs | 1 - src/formula/omml/utils.rs | 41 +-- src/iwa/numbers/table_extractor.rs | 60 ++-- src/iwa/pages/document.rs | 2 +- src/odf/core/metadata.rs | 6 +- src/odf/core/xml.rs | 8 +- src/odf/datatype.rs | 4 +- src/odf/elements/attr_parser.rs | 4 +- src/odf/elements/table.rs | 328 +++++++++--------- src/odf/elements/text.rs | 56 ++- src/odf/odp/mutable.rs | 12 +- src/odf/odp/parser.rs | 8 +- src/odf/ods/parser.rs | 40 +-- src/odf/odt/document.rs | 47 ++- src/odf/odt/parser.rs | 2 +- src/ole/doc/document.rs | 2 +- src/ole/doc/footnote.rs | 2 +- src/ole/doc/parts/numbering.rs | 6 +- src/ole/doc/parts/pap.rs | 49 ++- src/ole/doc/writer/core.rs | 4 +- src/ole/doc/writer/headers.rs | 2 +- src/ole/doc/writer/hyperlinks.rs | 2 +- src/ole/doc/writer/sprm.rs | 2 +- src/ole/ppt/writer/core.rs | 9 +- src/ole/ppt/writer/env_data.rs | 14 +- src/ole/writer/core.rs | 12 +- src/ole/xls/shapes.rs | 7 +- src/ole/xls/workbook.rs | 14 +- src/ooxml/charts/reader.rs | 8 +- src/ooxml/docx/bookmark.rs | 47 ++- src/ooxml/docx/comment.rs | 68 ++-- src/ooxml/docx/content_control.rs | 26 +- src/ooxml/docx/document.rs | 24 +- src/ooxml/docx/field.rs | 20 +- src/ooxml/docx/footnote.rs | 112 +++--- src/ooxml/docx/header_footer.rs | 112 +++--- src/ooxml/docx/paragraph.rs | 64 ++-- src/ooxml/docx/parts/document_part.rs | 66 ++-- src/ooxml/docx/table.rs | 108 +++--- src/ooxml/docx/variables.rs | 36 +- src/ooxml/opc/part.rs | 34 +- src/ooxml/opc/pkgreader.rs | 88 ++--- src/ooxml/pptx/backgrounds.rs | 16 +- src/ooxml/pptx/customshow.rs | 35 +- src/ooxml/pptx/parts/comment.rs | 56 +-- src/ooxml/pptx/parts/presentation.rs | 118 ++++--- src/ooxml/pptx/parts/slide.rs | 91 +++-- src/ooxml/pptx/presentation.rs | 46 ++- src/ooxml/pptx/protection.rs | 87 +++-- src/ooxml/pptx/shapes/base.rs | 45 +-- src/ooxml/pptx/shapes/table.rs | 78 ++--- src/ooxml/pptx/shapes/textframe.rs | 44 +-- src/ooxml/pptx/slide.rs | 12 +- src/ooxml/xlsb/cells_reader.rs | 77 ++-- src/ooxml/xlsb/named_ranges.rs | 22 +- src/ooxml/xlsb/writer/shared_strings.rs | 2 +- src/ooxml/xlsx/workbook.rs | 12 +- src/ooxml/xlsx/worksheet.rs | 40 +-- src/rtf/types.rs | 4 +- src/sheet/eval/engine/math/random.rs | 8 +- .../eval/engine/statistical/distributions.rs | 4 +- src/sheet/workbook_types.rs | 2 +- 63 files changed, 1082 insertions(+), 1304 deletions(-) diff --git a/examples/read_office_files.rs b/examples/read_office_files.rs index 8a2a4c6..d0834f3 100644 --- a/examples/read_office_files.rs +++ b/examples/read_office_files.rs @@ -118,17 +118,17 @@ fn demo_docx_reading() -> Result<(), Box> { println!(" Found '{}' in {} paragraphs", search_term, matches.len()); // Table analysis - if doc.has_tables()? { - if let Some(table) = doc.table(0)? { - let rows = table.rows()?; - println!( - " First table: {}x{} (rows x cols)", - rows.len(), - rows.first() - .map(|r| r.cells().map(|c| c.len()).unwrap_or(0)) - .unwrap_or(0) - ); - } + if doc.has_tables()? + && let Some(table) = doc.table(0)? + { + let rows = table.rows()?; + println!( + " First table: {}x{} (rows x cols)", + rows.len(), + rows.first() + .map(|r| r.cells().map(|c| c.len()).unwrap_or(0)) + .unwrap_or(0) + ); } // Metadata @@ -249,10 +249,10 @@ fn demo_pptx_reading() -> Result<(), Box> { println!(" Text preview: {}", preview); // Search in this slide - if let Ok(matches) = slide.find_text("important") { - if !matches.is_empty() { - println!(" Contains 'important' in {} shapes", matches.len()); - } + if let Ok(matches) = slide.find_text("important") + && !matches.is_empty() + { + println!(" Contains 'important' in {} shapes", matches.len()); } } diff --git a/soapberry-zip/src/office.rs b/soapberry-zip/src/office.rs index 0804f62..b9ddb42 100644 --- a/soapberry-zip/src/office.rs +++ b/soapberry-zip/src/office.rs @@ -597,7 +597,6 @@ const _: () = { #[cfg(test)] mod tests { use super::*; - use std::sync::atomic::{AtomicUsize, Ordering}; #[test] fn test_round_trip_stored() { diff --git a/src/formula/omml/utils.rs b/src/formula/omml/utils.rs index a2e1b41..3f0ee99 100644 --- a/src/formula/omml/utils.rs +++ b/src/formula/omml/utils.rs @@ -430,12 +430,10 @@ pub fn validate_math_nodes(nodes: &[super::MathNode]) -> Result<(), super::OmmlE )); } }, - super::MathNode::Root { base, .. } => { - if base.is_empty() { - return Err(super::OmmlError::MissingRequiredElement( - "Root base is empty".to_string(), - )); - } + super::MathNode::Root { base, .. } if base.is_empty() => { + return Err(super::OmmlError::MissingRequiredElement( + "Root base is empty".to_string(), + )); }, super::MathNode::Power { base, exponent } => { if base.is_empty() { @@ -473,12 +471,10 @@ pub fn validate_math_nodes(nodes: &[super::MathNode]) -> Result<(), super::OmmlE )); } }, - super::MathNode::Fenced { content, .. } => { - if content.is_empty() { - return Err(super::OmmlError::ValidationError( - "Fenced content is empty".to_string(), - )); - } + super::MathNode::Fenced { content, .. } if content.is_empty() => { + return Err(super::OmmlError::ValidationError( + "Fenced content is empty".to_string(), + )); }, super::MathNode::Matrix { rows, .. } => { if rows.is_empty() { @@ -509,28 +505,25 @@ pub fn validate_element_nesting( parent_type: Option<&ElementType>, ) -> Result<(), super::OmmlError> { match element_type { - ElementType::Math => { + ElementType::Math // Math element should be root or not have a parent - if parent_type.is_some() { + if parent_type.is_some() => { return Err(super::OmmlError::InvalidStructure( "Math element should be root".to_string(), )); - } - }, - ElementType::Numerator | ElementType::Denominator => { - if !matches!(parent_type, Some(ElementType::Fraction)) { + }, + ElementType::Numerator | ElementType::Denominator + if !matches!(parent_type, Some(ElementType::Fraction)) => { return Err(super::OmmlError::InvalidStructure( "Numerator/denominator must be inside fraction".to_string(), )); - } - }, - ElementType::Degree => { - if !matches!(parent_type, Some(ElementType::Radical)) { + }, + ElementType::Degree + if !matches!(parent_type, Some(ElementType::Radical)) => { return Err(super::OmmlError::InvalidStructure( "Degree must be inside radical".to_string(), )); - } - }, + }, ElementType::Base => { match parent_type { Some( diff --git a/src/iwa/numbers/table_extractor.rs b/src/iwa/numbers/table_extractor.rs index c81e56c..c6975c9 100644 --- a/src/iwa/numbers/table_extractor.rs +++ b/src/iwa/numbers/table_extractor.rs @@ -395,40 +395,30 @@ impl<'a> TableDataExtractor<'a> { match ast_node_type { // Arithmetic operators (binary) - AstNodeType::AdditionNode => { - if expr_stack.len() >= 2 { - let right = expr_stack.pop().unwrap(); - let left = expr_stack.pop().unwrap(); - expr_stack.push(format!("({}+{})", left, right)); - } + AstNodeType::AdditionNode if expr_stack.len() >= 2 => { + let right = expr_stack.pop().unwrap(); + let left = expr_stack.pop().unwrap(); + expr_stack.push(format!("({}+{})", left, right)); }, - AstNodeType::SubtractionNode => { - if expr_stack.len() >= 2 { - let right = expr_stack.pop().unwrap(); - let left = expr_stack.pop().unwrap(); - expr_stack.push(format!("({}-{})", left, right)); - } + AstNodeType::SubtractionNode if expr_stack.len() >= 2 => { + let right = expr_stack.pop().unwrap(); + let left = expr_stack.pop().unwrap(); + expr_stack.push(format!("({}-{})", left, right)); }, - AstNodeType::MultiplicationNode => { - if expr_stack.len() >= 2 { - let right = expr_stack.pop().unwrap(); - let left = expr_stack.pop().unwrap(); - expr_stack.push(format!("({}*{})", left, right)); - } + AstNodeType::MultiplicationNode if expr_stack.len() >= 2 => { + let right = expr_stack.pop().unwrap(); + let left = expr_stack.pop().unwrap(); + expr_stack.push(format!("({}*{})", left, right)); }, - AstNodeType::DivisionNode => { - if expr_stack.len() >= 2 { - let right = expr_stack.pop().unwrap(); - let left = expr_stack.pop().unwrap(); - expr_stack.push(format!("({}/{})", left, right)); - } + AstNodeType::DivisionNode if expr_stack.len() >= 2 => { + let right = expr_stack.pop().unwrap(); + let left = expr_stack.pop().unwrap(); + expr_stack.push(format!("({}/{})", left, right)); }, - AstNodeType::PowerNode => { - if expr_stack.len() >= 2 { - let right = expr_stack.pop().unwrap(); - let left = expr_stack.pop().unwrap(); - expr_stack.push(format!("({}^{})", left, right)); - } + AstNodeType::PowerNode if expr_stack.len() >= 2 => { + let right = expr_stack.pop().unwrap(); + let left = expr_stack.pop().unwrap(); + expr_stack.push(format!("({}^{})", left, right)); }, // Note: Comparison operators are handled differently in Numbers AST @@ -522,12 +512,10 @@ impl<'a> TableDataExtractor<'a> { }, // Concatenation - AstNodeType::ConcatenationNode => { - if expr_stack.len() >= 2 { - let right = expr_stack.pop().unwrap(); - let left = expr_stack.pop().unwrap(); - expr_stack.push(format!("({}&{})", left, right)); - } + AstNodeType::ConcatenationNode if expr_stack.len() >= 2 => { + let right = expr_stack.pop().unwrap(); + let left = expr_stack.pop().unwrap(); + expr_stack.push(format!("({}&{})", left, right)); }, // Other node types - handle gracefully diff --git a/src/iwa/pages/document.rs b/src/iwa/pages/document.rs index 057f16b..8928eeb 100644 --- a/src/iwa/pages/document.rs +++ b/src/iwa/pages/document.rs @@ -272,7 +272,7 @@ mod tests { ); let doc = doc_result.unwrap(); - assert!(doc.object_index.all_object_ids().len() > 0); + assert!(!doc.object_index.all_object_ids().is_empty()); } #[test] diff --git a/src/odf/core/metadata.rs b/src/odf/core/metadata.rs index bdc5a12..794e749 100644 --- a/src/odf/core/metadata.rs +++ b/src/odf/core/metadata.rs @@ -157,10 +157,8 @@ impl OdfMetadata { Ok(Event::Start(_)) => { depth += 1; }, - Ok(Event::Text(ref t)) => { - if depth == 0 { - content.push_str(&String::from_utf8(t.to_vec()).unwrap_or_default()); - } + Ok(Event::Text(ref t)) if depth == 0 => { + content.push_str(&String::from_utf8(t.to_vec()).unwrap_or_default()); }, Ok(Event::End(_)) => { if depth == 0 { diff --git a/src/odf/core/xml.rs b/src/odf/core/xml.rs index 31a88b5..313870d 100644 --- a/src/odf/core/xml.rs +++ b/src/odf/core/xml.rs @@ -81,11 +81,9 @@ impl Content { current_para_text.clear(); } }, - Ok(Event::Text(ref t)) => { - if in_paragraph { - let text_content = String::from_utf8(t.to_vec()).unwrap_or_default(); - current_para_text.push_str(&text_content); - } + Ok(Event::Text(ref t)) if in_paragraph => { + let text_content = String::from_utf8(t.to_vec()).unwrap_or_default(); + current_para_text.push_str(&text_content); }, Ok(Event::End(ref e)) => { // Copy the name bytes to avoid lifetime issues diff --git a/src/odf/datatype.rs b/src/odf/datatype.rs index 06b2d9f..a416eae 100644 --- a/src/odf/datatype.rs +++ b/src/odf/datatype.rs @@ -397,8 +397,8 @@ mod tests { #[test] fn test_boolean_decode() { - assert_eq!(Boolean::decode("true").unwrap(), true); - assert_eq!(Boolean::decode("false").unwrap(), false); + assert!(Boolean::decode("true").unwrap()); + assert!(!Boolean::decode("false").unwrap()); assert!(Boolean::decode("invalid").is_err()); assert!(Boolean::decode("TRUE").is_err()); assert!(Boolean::decode("1").is_err()); diff --git a/src/odf/elements/attr_parser.rs b/src/odf/elements/attr_parser.rs index 804e331..794cac1 100644 --- a/src/odf/elements/attr_parser.rs +++ b/src/odf/elements/attr_parser.rs @@ -553,8 +553,8 @@ mod tests { #[test] fn test_parse_bool() { - assert_eq!(parse_bool(b"true").unwrap(), true); - assert_eq!(parse_bool(b"false").unwrap(), false); + assert!(parse_bool(b"true").unwrap()); + assert!(!parse_bool(b"false").unwrap()); assert!(parse_bool(b"invalid").is_err()); } diff --git a/src/odf/elements/table.rs b/src/odf/elements/table.rs index aab71cc..af005be 100644 --- a/src/odf/elements/table.rs +++ b/src/odf/elements/table.rs @@ -535,6 +535,170 @@ impl From for Element { } } +/// Collection of table elements for easy parsing +pub struct TableElements; + +impl TableElements { + /// Parse all tables from document content (content.xml) + pub fn parse_tables_from_content(xml_content: &str) -> Result> { + Self::parse_tables(xml_content) + } + + /// Parse all tables from XML content + pub fn parse_tables(xml_content: &str) -> Result> { + let mut reader = quick_xml::Reader::from_str(xml_content); + let mut buf = Vec::new(); + let mut tables = Vec::new(); + let mut stack: Vec = Vec::new(); + + loop { + match reader.read_event_into(&mut buf) { + Ok(quick_xml::events::Event::Start(ref e)) => { + let tag_name = + String::from_utf8(e.name().as_ref().to_vec()).unwrap_or_default(); + + if tag_name == "table:table" { + let mut element = Element::new(&tag_name); + + // Parse attributes + for attr_result in e.attributes() { + if let Ok(attr) = attr_result + && let (Ok(key), Ok(value)) = ( + String::from_utf8(attr.key.as_ref().to_vec()), + String::from_utf8(attr.value.to_vec()), + ) + { + element.set_attribute(&key, &value); + } + } + + stack.push(element); + } else if !stack.is_empty() { + // Handle nested elements within table + let mut element = Element::new(&tag_name); + + // Parse attributes + for attr_result in e.attributes() { + if let Ok(attr) = attr_result + && let (Ok(key), Ok(value)) = ( + String::from_utf8(attr.key.as_ref().to_vec()), + String::from_utf8(attr.value.to_vec()), + ) + { + element.set_attribute(&key, &value); + } + } + + stack.push(element); + } + }, + Ok(quick_xml::events::Event::Text(ref t)) => { + if let Some(current) = stack.last_mut() + && let Ok(text) = String::from_utf8(t.to_vec()) + { + let current_text = current.text().to_string(); + current.set_text(&format!("{}{}", current_text, text)); + } + }, + Ok(quick_xml::events::Event::End(ref e)) => { + let tag_name = + String::from_utf8(e.name().as_ref().to_vec()).unwrap_or_default(); + + if tag_name == "table:table" { + if let Some(table_element) = stack.pop() + && let Ok(table) = Table::from_element(table_element) + { + tables.push(table); + } + } else if !stack.is_empty() { + let element = stack.pop().unwrap(); + if let Some(parent) = stack.last_mut() { + parent.add_child(element); + } + } + }, + Ok(quick_xml::events::Event::Eof) => break, + Err(_) => break, + _ => {}, + } + buf.clear(); + } + + Ok(tables) + } + + /// Parse table from XML content with proper handling of repeated cells + #[allow(dead_code)] + pub fn parse_table_with_expansion( + xml_content: &str, + table_name: Option<&str>, + ) -> Result> { + let tables = Self::parse_tables(xml_content)?; + + for table in tables { + if table_name.is_none() || table.name() == table_name { + // Expand repeated cells + let mut expanded_table = Table::new(); + if let Some(name) = table.name() { + expanded_table.set_name(name); + } + if let Some(style) = table.style_name() { + expanded_table.set_style_name(style); + } + + for row in table.rows()? { + let mut expanded_row = TableRow::new(); + if let Some(style) = row.style_name() { + expanded_row.set_style_name(style); + } + + for cell in row.cells()? { + let repeated = cell + .element + .get_int_attribute("table:number-columns-repeated") + .map(|n| n as usize) + .unwrap_or(1); + + for _ in 0..repeated { + let mut new_cell = TableCell::new(); + new_cell.set_text(cell.text()?.as_str()); + + // Copy other attributes + if let Some(formula) = cell.formula() { + new_cell.set_formula(formula); + } + if let Some(style) = cell.style_name() { + new_cell.set_style_name(style); + } + if cell.colspan() > 1 { + new_cell.set_colspan(cell.colspan()); + } + if cell.rowspan() > 1 { + new_cell.set_rowspan(cell.rowspan()); + } + + // Copy value attributes + for (key, value) in cell.element.attributes() { + if key.starts_with("office:") { + new_cell.element.set_attribute(key, value); + } + } + + expanded_row.add_cell(new_cell); + } + } + + expanded_table.add_row(expanded_row); + } + + return Ok(Some(expanded_table)); + } + } + + Ok(None) + } +} + #[cfg(test)] mod tests { use super::*; @@ -1013,167 +1177,3 @@ mod tests { assert_eq!(col2.repeated(), 5); } } - -/// Collection of table elements for easy parsing -pub struct TableElements; - -impl TableElements { - /// Parse all tables from document content (content.xml) - pub fn parse_tables_from_content(xml_content: &str) -> Result> { - Self::parse_tables(xml_content) - } - - /// Parse all tables from XML content - pub fn parse_tables(xml_content: &str) -> Result> { - let mut reader = quick_xml::Reader::from_str(xml_content); - let mut buf = Vec::new(); - let mut tables = Vec::new(); - let mut stack: Vec = Vec::new(); - - loop { - match reader.read_event_into(&mut buf) { - Ok(quick_xml::events::Event::Start(ref e)) => { - let tag_name = - String::from_utf8(e.name().as_ref().to_vec()).unwrap_or_default(); - - if tag_name == "table:table" { - let mut element = Element::new(&tag_name); - - // Parse attributes - for attr_result in e.attributes() { - if let Ok(attr) = attr_result - && let (Ok(key), Ok(value)) = ( - String::from_utf8(attr.key.as_ref().to_vec()), - String::from_utf8(attr.value.to_vec()), - ) - { - element.set_attribute(&key, &value); - } - } - - stack.push(element); - } else if !stack.is_empty() { - // Handle nested elements within table - let mut element = Element::new(&tag_name); - - // Parse attributes - for attr_result in e.attributes() { - if let Ok(attr) = attr_result - && let (Ok(key), Ok(value)) = ( - String::from_utf8(attr.key.as_ref().to_vec()), - String::from_utf8(attr.value.to_vec()), - ) - { - element.set_attribute(&key, &value); - } - } - - stack.push(element); - } - }, - Ok(quick_xml::events::Event::Text(ref t)) => { - if let Some(current) = stack.last_mut() - && let Ok(text) = String::from_utf8(t.to_vec()) - { - let current_text = current.text().to_string(); - current.set_text(&format!("{}{}", current_text, text)); - } - }, - Ok(quick_xml::events::Event::End(ref e)) => { - let tag_name = - String::from_utf8(e.name().as_ref().to_vec()).unwrap_or_default(); - - if tag_name == "table:table" { - if let Some(table_element) = stack.pop() - && let Ok(table) = Table::from_element(table_element) - { - tables.push(table); - } - } else if !stack.is_empty() { - let element = stack.pop().unwrap(); - if let Some(parent) = stack.last_mut() { - parent.add_child(element); - } - } - }, - Ok(quick_xml::events::Event::Eof) => break, - Err(_) => break, - _ => {}, - } - buf.clear(); - } - - Ok(tables) - } - - /// Parse table from XML content with proper handling of repeated cells - #[allow(dead_code)] - pub fn parse_table_with_expansion( - xml_content: &str, - table_name: Option<&str>, - ) -> Result> { - let tables = Self::parse_tables(xml_content)?; - - for table in tables { - if table_name.is_none() || table.name() == table_name { - // Expand repeated cells - let mut expanded_table = Table::new(); - if let Some(name) = table.name() { - expanded_table.set_name(name); - } - if let Some(style) = table.style_name() { - expanded_table.set_style_name(style); - } - - for row in table.rows()? { - let mut expanded_row = TableRow::new(); - if let Some(style) = row.style_name() { - expanded_row.set_style_name(style); - } - - for cell in row.cells()? { - let repeated = cell - .element - .get_int_attribute("table:number-columns-repeated") - .map(|n| n as usize) - .unwrap_or(1); - - for _ in 0..repeated { - let mut new_cell = TableCell::new(); - new_cell.set_text(cell.text()?.as_str()); - - // Copy other attributes - if let Some(formula) = cell.formula() { - new_cell.set_formula(formula); - } - if let Some(style) = cell.style_name() { - new_cell.set_style_name(style); - } - if cell.colspan() > 1 { - new_cell.set_colspan(cell.colspan()); - } - if cell.rowspan() > 1 { - new_cell.set_rowspan(cell.rowspan()); - } - - // Copy value attributes - for (key, value) in cell.element.attributes() { - if key.starts_with("office:") { - new_cell.element.set_attribute(key, value); - } - } - - expanded_row.add_cell(new_cell); - } - } - - expanded_table.add_row(expanded_row); - } - - return Ok(Some(expanded_table)); - } - } - - Ok(None) - } -} diff --git a/src/odf/elements/text.rs b/src/odf/elements/text.rs index 9968ffb..2fdd34f 100644 --- a/src/odf/elements/text.rs +++ b/src/odf/elements/text.rs @@ -719,16 +719,15 @@ impl TextElements { // Sections are transparent containers, just continue }, // Text boxes and frames - "draw:text-box" => { + "draw:text-box" // Text boxes should contribute their text content - if !paragraph_text.is_empty() { + if !paragraph_text.is_empty() => { if !text.is_empty() { text.push('\n'); } text.push_str(¶graph_text); paragraph_text.clear(); - } - }, + }, // Annotations (comments) "office:annotation" => { // Optionally include annotations @@ -736,19 +735,17 @@ impl TextElements { // skip_depth = 1; }, // Line breaks and formatting - "text:line-break" => { - if in_text_context { + "text:line-break" + if in_text_context => { paragraph_text.push('\n'); - } - }, - "text:tab" => { - if in_text_context { + }, + "text:tab" + if in_text_context => { paragraph_text.push('\t'); - } - }, - "text:s" => { + }, + "text:s" // Repeated space element - if in_text_context { + if in_text_context => { // Get the count attribute (defaults to 1) let count = e .attributes() @@ -768,18 +765,16 @@ impl TextElements { for _ in 0..count { paragraph_text.push(' '); } - } - }, - "text:soft-page-break" => { + }, + "text:soft-page-break" // Soft page breaks can be treated as paragraph breaks - if in_text_context && !paragraph_text.is_empty() { + if in_text_context && !paragraph_text.is_empty() => { if !text.is_empty() { text.push('\n'); } text.push_str(¶graph_text); paragraph_text.clear(); - } - }, + }, _ => {}, // Ignore other elements } }, @@ -793,19 +788,17 @@ impl TextElements { } match tag_name.as_str() { - "text:line-break" => { - if in_text_context { + "text:line-break" + if in_text_context => { paragraph_text.push('\n'); - } - }, - "text:tab" => { - if in_text_context { + }, + "text:tab" + if in_text_context => { paragraph_text.push('\t'); - } - }, - "text:s" => { + }, + "text:s" // Repeated space element - if in_text_context { + if in_text_context => { let count = e .attributes() .find_map(|attr| { @@ -824,8 +817,7 @@ impl TextElements { for _ in 0..count { paragraph_text.push(' '); } - } - }, + }, _ => {}, } }, diff --git a/src/odf/odp/mutable.rs b/src/odf/odp/mutable.rs index 5c435f3..8fb0b6b 100644 --- a/src/odf/odp/mutable.rs +++ b/src/odf/odp/mutable.rs @@ -451,11 +451,12 @@ impl MutablePresentation { let style_name = shape.style_name.as_deref().unwrap_or("gr3"); match shape.shape_type { - ShapeType::TextBox | ShapeType::AutoShape | ShapeType::Placeholder => { - if shape.has_text() { - let escaped_name = escape_xml(name); - let escaped_shape_text = escape_xml(&shape.text); - body.push_str(&xml_minifier::minified_xml_format!( + ShapeType::TextBox | ShapeType::AutoShape | ShapeType::Placeholder + if shape.has_text() => + { + let escaped_name = escape_xml(name); + let escaped_shape_text = escape_xml(&shape.text); + body.push_str(&xml_minifier::minified_xml_format!( r#"{}"#, escaped_name, style_name, @@ -465,7 +466,6 @@ impl MutablePresentation { height, escaped_shape_text )); - } }, _ => {}, } diff --git a/src/odf/odp/parser.rs b/src/odf/odp/parser.rs index 0ea0624..c7d6ab7 100644 --- a/src/odf/odp/parser.rs +++ b/src/odf/odp/parser.rs @@ -149,10 +149,8 @@ impl OdpParser { shape_depth += 1; } }, - b"draw:text-box" => { - if current_shape.is_some() { - in_text_box = true; - } + b"draw:text-box" if current_shape.is_some() => { + in_text_box = true; }, b"text:p" | b"text:span" => { // Text will be collected in Text event @@ -443,7 +441,7 @@ mod tests { #[test] fn test_shape_type_clone() { let t1 = ShapeType::Placeholder; - let t2 = t1.clone(); + let t2 = t1; assert_eq!(t1, t2); } diff --git a/src/odf/ods/parser.rs b/src/odf/ods/parser.rs index be17fcc..53b2289 100644 --- a/src/odf/ods/parser.rs +++ b/src/odf/ods/parser.rs @@ -32,40 +32,30 @@ impl OdsParser { let name = Self::extract_table_name(e)?; current_sheet = Some(SheetBuilder::new(name)); }, - b"table:table-row" => { - if current_sheet.is_some() { - current_row = Some(RowBuilder::new()); - } + b"table:table-row" if current_sheet.is_some() => { + current_row = Some(RowBuilder::new()); }, - b"table:table-cell" => { - if current_row.is_some() { - let cell_builder = Self::parse_cell_attributes(e)?; - current_cell = Some(cell_builder); - text_content.clear(); - } + b"table:table-cell" if current_row.is_some() => { + let cell_builder = Self::parse_cell_attributes(e)?; + current_cell = Some(cell_builder); + text_content.clear(); }, - b"text:p" | b"text:span" => { - if current_cell.is_some() { - in_text_element = true; - if e.name().as_ref() == b"text:p" { - text_content.clear(); - } + b"text:p" | b"text:span" if current_cell.is_some() => { + in_text_element = true; + if e.name().as_ref() == b"text:p" { + text_content.clear(); } }, _ => {}, }, - Ok(Event::Text(ref t)) => { - if in_text_element && current_cell.is_some() { - let text = String::from_utf8(t.to_vec()).unwrap_or_default(); - text_content.push_str(&text); - } + Ok(Event::Text(ref t)) if in_text_element && current_cell.is_some() => { + let text = String::from_utf8(t.to_vec()).unwrap_or_default(); + text_content.push_str(&text); }, Ok(Event::End(ref e)) => { match e.name().as_ref() { - b"text:p" | b"text:span" => { - if in_text_element { - in_text_element = false; - } + b"text:p" | b"text:span" if in_text_element => { + in_text_element = false; }, b"table:table-cell" => { if let Some(cell_builder) = current_cell.take() { diff --git a/src/odf/odt/document.rs b/src/odf/odt/document.rs index daff668..ca74dc6 100644 --- a/src/odf/odt/document.rs +++ b/src/odf/odt/document.rs @@ -789,21 +789,20 @@ impl Document { loop { match reader.read_event_into(&mut buf) { - Ok(Event::Empty(ref e)) | Ok(Event::Start(ref e)) => { - if e.name().as_ref() == b"text:bookmark" + Ok(Event::Empty(ref e)) | Ok(Event::Start(ref e)) + if (e.name().as_ref() == b"text:bookmark" || e.name().as_ref() == b"text:bookmark-start" - || e.name().as_ref() == b"text:bookmark-end" - { - // Extract name attribute - for attr in e.attributes().filter_map(|a| a.ok()) { - if attr.key.as_ref() == b"text:name" { - if let Ok(name) = String::from_utf8(attr.value.to_vec()) - && !bookmarks.contains(&name) - { - bookmarks.push(name); - } - break; + || e.name().as_ref() == b"text:bookmark-end") => + { + // Extract name attribute + for attr in e.attributes().filter_map(|a| a.ok()) { + if attr.key.as_ref() == b"text:name" { + if let Ok(name) = String::from_utf8(attr.value.to_vec()) + && !bookmarks.contains(&name) + { + bookmarks.push(name); } + break; } } }, @@ -848,17 +847,17 @@ impl Document { loop { match reader.read_event_into(&mut buf) { - Ok(Event::Empty(ref e)) | Ok(Event::Start(ref e)) => { - if e.name().as_ref() == b"draw:image" { - // Extract href attribute - if let Some(href) = e - .attributes() - .filter_map(|a| a.ok()) - .find(|attr| attr.key.as_ref() == b"xlink:href") - .and_then(|attr| String::from_utf8(attr.value.to_vec()).ok()) - { - images.push(href); - } + Ok(Event::Empty(ref e)) | Ok(Event::Start(ref e)) + if e.name().as_ref() == b"draw:image" => + { + // Extract href attribute + if let Some(href) = e + .attributes() + .filter_map(|a| a.ok()) + .find(|attr| attr.key.as_ref() == b"xlink:href") + .and_then(|attr| String::from_utf8(attr.value.to_vec()).ok()) + { + images.push(href); } }, Ok(Event::Eof) => break, diff --git a/src/odf/odt/parser.rs b/src/odf/odt/parser.rs index cbd9bdf..198fce8 100644 --- a/src/odf/odt/parser.rs +++ b/src/odf/odt/parser.rs @@ -605,7 +605,7 @@ mod tests { #[test] fn test_change_type_clone() { let t1 = ChangeType::Insertion; - let t2 = t1.clone(); + let t2 = t1; assert_eq!(t1, t2); } diff --git a/src/ole/doc/document.rs b/src/ole/doc/document.rs index 78b82b1..98222ce 100644 --- a/src/ole/doc/document.rs +++ b/src/ole/doc/document.rs @@ -453,7 +453,7 @@ impl Document { if let Some(plcf) = PlcfParser::parse(&pap_data[..pap_len], 4) { // PLCF count represents the number of paragraph boundaries // The actual paragraph count is the number of intervals - return Ok(plcf.count().saturating_sub(1).max(0)); + return Ok(plcf.count().saturating_sub(1)); } } diff --git a/src/ole/doc/footnote.rs b/src/ole/doc/footnote.rs index d0ba07f..b1702dd 100644 --- a/src/ole/doc/footnote.rs +++ b/src/ole/doc/footnote.rs @@ -172,7 +172,7 @@ mod tests { #[test] fn test_multiple_footnotes() { - let footnotes = vec![ + let footnotes = [ Footnote::new(10, 1, "First".to_string()), Footnote::new(50, 2, "Second".to_string()), Footnote::new(100, 3, "Third".to_string()), diff --git a/src/ole/doc/parts/numbering.rs b/src/ole/doc/parts/numbering.rs index 1c49435..03c05b9 100644 --- a/src/ole/doc/parts/numbering.rs +++ b/src/ole/doc/parts/numbering.rs @@ -456,7 +456,7 @@ mod tests { #[test] fn test_number_format_clone() { let fmt = NumberFormat::Bullet; - let cloned = fmt.clone(); + let cloned = fmt; assert_eq!(fmt, cloned); } @@ -489,7 +489,7 @@ mod tests { #[test] fn test_list_alignment_clone() { let align = ListAlignment::Center; - let cloned = align.clone(); + let cloned = align; assert_eq!(align, cloned); } @@ -876,7 +876,7 @@ mod tests { #[test] fn test_utf16le_ext_multiple_chars() { // "ABC" in UTF-16LE - let bytes = vec!['A' as u16, 'B' as u16, 'C' as u16] + let bytes = ['A' as u16, 'B' as u16, 'C' as u16] .iter() .flat_map(|c| c.to_le_bytes()) .collect::>(); diff --git a/src/ole/doc/parts/pap.rs b/src/ole/doc/parts/pap.rs index 6baf2f3..8bf1e53 100644 --- a/src/ole/doc/parts/pap.rs +++ b/src/ole/doc/parts/pap.rs @@ -523,42 +523,36 @@ impl ParagraphProperties { } }, // Operation 0x24: sprmPBrcTop - Top border - 0x24 => { + 0x24 // Parse BorderCode structure (4 bytes) - if sprm.operand.len() >= 4 { + if sprm.operand.len() >= 4 => { pap.borders.top = Self::parse_border(&sprm.operand); - } - }, + }, // Operation 0x25: sprmPBrcLeft - Left border - 0x25 => { - if sprm.operand.len() >= 4 { + 0x25 + if sprm.operand.len() >= 4 => { pap.borders.left = Self::parse_border(&sprm.operand); - } - }, + }, // Operation 0x26: sprmPBrcBottom - Bottom border - 0x26 => { - if sprm.operand.len() >= 4 { + 0x26 + if sprm.operand.len() >= 4 => { pap.borders.bottom = Self::parse_border(&sprm.operand); - } - }, + }, // Operation 0x27: sprmPBrcRight - Right border - 0x27 => { - if sprm.operand.len() >= 4 { + 0x27 + if sprm.operand.len() >= 4 => { pap.borders.right = Self::parse_border(&sprm.operand); - } - }, + }, // Operation 0x28: sprmPBrcBetween - Between border - 0x28 => { - if sprm.operand.len() >= 4 { + 0x28 + if sprm.operand.len() >= 4 => { pap.borders.between = Self::parse_border(&sprm.operand); - } - }, + }, // Operation 0x29: sprmPBrcBar - Bar border - 0x29 => { - if sprm.operand.len() >= 4 { + 0x29 + if sprm.operand.len() >= 4 => { pap.borders.bar = Self::parse_border(&sprm.operand); - } - }, + }, // Operation 0x2A: sprmPFNoAutoHyph - No auto hyphenation 0x2A => { if let Some(val) = sprm.operand_byte() { @@ -722,12 +716,11 @@ impl ParagraphProperties { } }, // Operation 0x4D: sprmPShd - Shading (Word 2002+) - 0x4D => { + 0x4D // Parse ShadingDescriptor structure - if sprm.operand.len() >= 10 { + if sprm.operand.len() >= 10 => { pap.shading = Self::parse_shading_descriptor(&sprm.operand); - } - }, + }, // Operations 0x4E-0x53: Borders v80 0x4E..=0x53 => { // BrcXXX80 - Word 97-2000 borders diff --git a/src/ole/doc/writer/core.rs b/src/ole/doc/writer/core.rs index bff8488..90e1a47 100644 --- a/src/ole/doc/writer/core.rs +++ b/src/ole/doc/writer/core.rs @@ -2254,7 +2254,7 @@ mod tests { let mut cursor = Cursor::new(Vec::new()); let result = writer.write_to(&mut cursor); assert!(result.is_ok()); - assert!(cursor.into_inner().len() > 0); + assert!(!cursor.into_inner().is_empty()); } #[test] @@ -2264,7 +2264,7 @@ mod tests { let result = writer.write_to(&mut cursor); assert!(result.is_ok()); let data = cursor.into_inner(); - assert!(data.len() > 0); + assert!(!data.is_empty()); } #[test] diff --git a/src/ole/doc/writer/headers.rs b/src/ole/doc/writer/headers.rs index 6fdc452..9d4a43b 100644 --- a/src/ole/doc/writer/headers.rs +++ b/src/ole/doc/writer/headers.rs @@ -124,7 +124,7 @@ mod tests { #[test] fn test_header_footer_type_variants() { // Test all HeaderFooterType variants exist and are distinct - let types = vec![ + let types = [ HeaderFooterType::FirstPageHeader, HeaderFooterType::OddPageHeader, HeaderFooterType::EvenPageHeader, diff --git a/src/ole/doc/writer/hyperlinks.rs b/src/ole/doc/writer/hyperlinks.rs index e2eb4e4..384ca82 100644 --- a/src/ole/doc/writer/hyperlinks.rs +++ b/src/ole/doc/writer/hyperlinks.rs @@ -132,7 +132,7 @@ mod tests { #[test] fn test_hyperlink_type_variants() { // Test all hyperlink types are distinct - let types = vec![ + let types = [ HyperlinkType::Url, HyperlinkType::Email, HyperlinkType::File, diff --git a/src/ole/doc/writer/sprm.rs b/src/ole/doc/writer/sprm.rs index 74234c1..274eae3 100644 --- a/src/ole/doc/writer/sprm.rs +++ b/src/ole/doc/writer/sprm.rs @@ -208,7 +208,7 @@ mod tests { builder.add_byte(chp::UNDERLINE, 1); let sprms = builder.build(); - assert!(sprms.len() > 0); + assert!(!sprms.is_empty()); } #[test] diff --git a/src/ole/ppt/writer/core.rs b/src/ole/ppt/writer/core.rs index 4647a2b..fbe0ac6 100644 --- a/src/ole/ppt/writer/core.rs +++ b/src/ole/ppt/writer/core.rs @@ -2022,8 +2022,7 @@ mod tests { #[test] fn test_ppt_write_error_display() { - let io_err = - PptWriteError::Io(std::io::Error::new(std::io::ErrorKind::Other, "test error")); + let io_err = PptWriteError::Io(std::io::Error::other("test error")); let err_str = format!("{}", io_err); assert!(err_str.contains("I/O error")); @@ -2082,7 +2081,7 @@ mod tests { let mut buffer = Cursor::new(Vec::new()); let result = writer.write_to(&mut buffer); assert!(result.is_ok()); - assert!(buffer.get_ref().len() > 0); + assert!(!buffer.get_ref().is_empty()); } #[test] @@ -2091,7 +2090,7 @@ mod tests { let mut buffer = Cursor::new(Vec::new()); let result = writer.write_to(&mut buffer); assert!(result.is_ok()); - assert!(buffer.get_ref().len() > 0); + assert!(!buffer.get_ref().is_empty()); } #[test] @@ -2112,7 +2111,7 @@ mod tests { let mut buffer = Cursor::new(Vec::new()); let result = writer.write_to(&mut buffer); assert!(result.is_ok()); - assert!(buffer.get_ref().len() > 0); + assert!(!buffer.get_ref().is_empty()); } #[test] diff --git a/src/ole/ppt/writer/env_data.rs b/src/ole/ppt/writer/env_data.rs index c81da39..59fb73e 100644 --- a/src/ole/ppt/writer/env_data.rs +++ b/src/ole/ppt/writer/env_data.rs @@ -588,14 +588,14 @@ mod tests { #[test] fn test_sr_kinsoku_atom_clone() { let atom = SrKinsokuAtom { kinsoku_type: 2 }; - let cloned = atom.clone(); + let cloned = atom; assert_eq!(atom.kinsoku_type, cloned.kinsoku_type); } #[test] fn test_tx_cf_style_atom_clone() { let atom = TxCFStyleAtom::DEFAULT; - let cloned = atom.clone(); + let cloned = atom; assert_eq!(atom.cf_mask, cloned.cf_mask); assert_eq!(atom.font_ref, cloned.font_ref); } @@ -603,7 +603,7 @@ mod tests { #[test] fn test_tx_pf_style_atom_clone() { let atom = TxPFStyleAtom::DEFAULT; - let cloned = atom.clone(); + let cloned = atom; assert_eq!(atom.pf_mask, cloned.pf_mask); assert_eq!(atom.bullet_char, cloned.bullet_char); } @@ -611,7 +611,7 @@ mod tests { #[test] fn test_tx_si_style_atom_clone() { let atom = TxSIStyleAtom::DEFAULT; - let cloned = atom.clone(); + let cloned = atom; assert_eq!(atom.lang, cloned.lang); assert_eq!(atom.alt_lang, cloned.alt_lang); } @@ -619,7 +619,7 @@ mod tests { #[test] fn test_sheet_properties_atom_clone() { let atom = SheetPropertiesAtom::DEFAULT; - let cloned = atom.clone(); + let cloned = atom; assert_eq!(atom.creation_time, cloned.creation_time); assert_eq!(atom.flags, cloned.flags); } @@ -627,7 +627,7 @@ mod tests { #[test] fn test_slide_view_info_atom_clone() { let atom = SlideViewInfoAtom::DEFAULT; - let cloned = atom.clone(); + let cloned = atom; assert_eq!(atom.snap_to_grid, cloned.snap_to_grid); assert_eq!(atom.snap_to_shape, cloned.snap_to_shape); } @@ -635,7 +635,7 @@ mod tests { #[test] fn test_vba_info_atom_clone() { let atom = VBAInfoAtom::DEFAULT; - let cloned = atom.clone(); + let cloned = atom; assert_eq!(atom.persist_id_ref, cloned.persist_id_ref); assert_eq!(atom.flags, cloned.flags); } diff --git a/src/ole/writer/core.rs b/src/ole/writer/core.rs index 54d3e3b..de4d487 100644 --- a/src/ole/writer/core.rs +++ b/src/ole/writer/core.rs @@ -584,13 +584,13 @@ impl OleWriter { // Write MiniFAT sectors (if any) if !minifat.is_empty() && minifat_start_sector != ENDOFCHAIN { let minifat_sectors = minifat.generate_minifat_sectors(self.sector_size); - let mut current_sector = minifat_start_sector; - for minifat_sector_data in &minifat_sectors { + for (current_sector, minifat_sector_data) in + (minifat_start_sector..).zip(minifat_sectors.iter()) + { let position = ((current_sector as u64) + 1) * (self.sector_size as u64); writer.seek(SeekFrom::Start(position))?; writer.write_all(minifat_sector_data)?; - current_sector += 1; } } @@ -604,12 +604,12 @@ impl OleWriter { // Write DIFAT sectors (if any) if !difat_sectors.is_empty() { - let mut current_sector = difat_start_sector; - for difat_sector_data in &difat_sectors { + for (current_sector, difat_sector_data) in + (difat_start_sector..).zip(difat_sectors.iter()) + { let position = ((current_sector as u64) + 1) * (self.sector_size as u64); writer.seek(SeekFrom::Start(position))?; writer.write_all(difat_sector_data)?; - current_sector += 1; } } diff --git a/src/ole/xls/shapes.rs b/src/ole/xls/shapes.rs index 344c8f9..affb998 100644 --- a/src/ole/xls/shapes.rs +++ b/src/ole/xls/shapes.rs @@ -62,13 +62,12 @@ pub fn extract_shapes_from_workbook(workbook_data: &[u8]) -> std::io::Result { + 0x00EB // Parse drawing group data if needed - if !record.data.is_empty() { + if !record.data.is_empty() => { let shapes = EscherShapeFactory::extract_shapes_from_drawing(&record.data)?; all_shapes.extend(shapes.iter().map(XlsShape::from_escher)); - } - }, + }, _ => {}, } } diff --git a/src/ole/xls/workbook.rs b/src/ole/xls/workbook.rs index 0afe52f..b87270d 100644 --- a/src/ole/xls/workbook.rs +++ b/src/ole/xls/workbook.rs @@ -131,20 +131,18 @@ impl XlsWorkbook { self.biff_version = bof.version; self.is_1904_date_system = bof.is_1904_date_system; }, - 0x0042 => { + 0x0042 // CodePage - if record.data.len() >= 2 { + if record.data.len() >= 2 => { let codepage = crate::common::binary::read_u16_le_at(&record.data, 0)?; *encoding = XlsEncoding::from_codepage(codepage)?; - } - }, - 0x0022 => { + }, + 0x0022 // Date1904 - if record.data.len() >= 2 { + if record.data.len() >= 2 => { let flag = crate::common::binary::read_u16_le_at(&record.data, 0)?; self.is_1904_date_system = flag == 1; - } - }, + }, 0x0085 => { // BoundSheet8 let sheet = BoundSheetRecord::parse(&record.data, encoding)?; diff --git a/src/ooxml/charts/reader.rs b/src/ooxml/charts/reader.rs index 48bf277..596f21c 100644 --- a/src/ooxml/charts/reader.rs +++ b/src/ooxml/charts/reader.rs @@ -166,10 +166,10 @@ fn parse_wall_floor(reader: &mut Reader) -> Result { loop { match reader.read_event_into(&mut buf) { - Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => { - if e.local_name().as_ref() == b"c:thickness" { - wall_floor.thickness = parse_u32_attr(e, b"val"); - } + Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) + if e.local_name().as_ref() == b"c:thickness" => + { + wall_floor.thickness = parse_u32_attr(e, b"val"); }, Ok(Event::End(ref e)) => { let tag_name = e.local_name(); diff --git a/src/ooxml/docx/bookmark.rs b/src/ooxml/docx/bookmark.rs index 192fae3..6482f29 100644 --- a/src/ooxml/docx/bookmark.rs +++ b/src/ooxml/docx/bookmark.rs @@ -72,33 +72,32 @@ impl Bookmark { loop { match reader.read_event() { - Ok(Event::Empty(e)) | Ok(Event::Start(e)) => { - if e.local_name().as_ref() == b"bookmarkStart" { - let mut id: Option = None; - let mut name = String::new(); + Ok(Event::Empty(e)) | Ok(Event::Start(e)) + if e.local_name().as_ref() == b"bookmarkStart" => + { + let mut id: Option = None; + let mut name = String::new(); - // Parse attributes - for attr in e.attributes().flatten() { - match attr.key.local_name().as_ref() { - b"id" => { - let id_str = String::from_utf8_lossy(&attr.value); - id = atoi_simd::parse::(id_str.as_bytes()) - .ok(); - }, - b"name" => { - name = String::from_utf8_lossy(&attr.value).into_owned(); - }, - _ => {}, - } + // Parse attributes + for attr in e.attributes().flatten() { + match attr.key.local_name().as_ref() { + b"id" => { + let id_str = String::from_utf8_lossy(&attr.value); + id = atoi_simd::parse::(id_str.as_bytes()).ok(); + }, + b"name" => { + name = String::from_utf8_lossy(&attr.value).into_owned(); + }, + _ => {}, } + } - // Skip system bookmarks (starting with _) - if let Some(bookmark_id) = id - && !name.is_empty() - && !name.starts_with('_') - { - bookmarks.push(Bookmark::new(bookmark_id, name)); - } + // Skip system bookmarks (starting with _) + if let Some(bookmark_id) = id + && !name.is_empty() + && !name.starts_with('_') + { + bookmarks.push(Bookmark::new(bookmark_id, name)); } }, Ok(Event::Eof) => break, diff --git a/src/ooxml/docx/comment.rs b/src/ooxml/docx/comment.rs index f667e35..37fffa8 100644 --- a/src/ooxml/docx/comment.rs +++ b/src/ooxml/docx/comment.rs @@ -120,19 +120,15 @@ impl Comment { loop { match reader.read_event() { - Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { - if e.local_name().as_ref() == b"t" { - in_text_element = true; - } + Ok(Event::Start(e)) | Ok(Event::Empty(e)) if e.local_name().as_ref() == b"t" => { + in_text_element = true; }, Ok(Event::Text(e)) if in_text_element => { let text = unsafe { std::str::from_utf8_unchecked(e.as_ref()) }; result.push_str(text); }, - Ok(Event::End(e)) => { - if e.local_name().as_ref() == b"t" { - in_text_element = false; - } + Ok(Event::End(e)) if e.local_name().as_ref() == b"t" => { + in_text_element = false; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), @@ -219,41 +215,37 @@ impl Comment { current_comment_xml.extend_from_slice(b">"); } }, - Ok(Event::End(e)) => { - if in_comment { - current_comment_xml.extend_from_slice(b""); + Ok(Event::End(e)) if in_comment => { + current_comment_xml.extend_from_slice(b""); - if e.local_name().as_ref() == b"comment" && depth == 1 { - if let Some(id) = current_id { - comments.push(Comment::new( - id, - current_author.clone(), - current_initials.clone(), - current_date.clone(), - current_comment_xml.clone(), - )); - } - in_comment = false; - } else { - depth -= 1; + if e.local_name().as_ref() == b"comment" && depth == 1 { + if let Some(id) = current_id { + comments.push(Comment::new( + id, + current_author.clone(), + current_initials.clone(), + current_date.clone(), + current_comment_xml.clone(), + )); } + in_comment = false; + } else { + depth -= 1; } }, - Ok(Event::Empty(e)) => { - if in_comment { - current_comment_xml.extend_from_slice(b"<"); - current_comment_xml.extend_from_slice(e.name().as_ref()); - for attr in e.attributes().flatten() { - current_comment_xml.extend_from_slice(b" "); - current_comment_xml.extend_from_slice(attr.key.as_ref()); - current_comment_xml.extend_from_slice(b"=\""); - current_comment_xml.extend_from_slice(&attr.value); - current_comment_xml.extend_from_slice(b"\""); - } - current_comment_xml.extend_from_slice(b"/>"); + Ok(Event::Empty(e)) if in_comment => { + current_comment_xml.extend_from_slice(b"<"); + current_comment_xml.extend_from_slice(e.name().as_ref()); + for attr in e.attributes().flatten() { + current_comment_xml.extend_from_slice(b" "); + current_comment_xml.extend_from_slice(attr.key.as_ref()); + current_comment_xml.extend_from_slice(b"=\""); + current_comment_xml.extend_from_slice(&attr.value); + current_comment_xml.extend_from_slice(b"\""); } + current_comment_xml.extend_from_slice(b"/>"); }, Ok(Event::Text(e)) if in_comment => { current_comment_xml.extend_from_slice(e.as_ref()); diff --git a/src/ooxml/docx/content_control.rs b/src/ooxml/docx/content_control.rs index 3f05111..6019083 100644 --- a/src/ooxml/docx/content_control.rs +++ b/src/ooxml/docx/content_control.rs @@ -186,21 +186,19 @@ impl ContentControl { _ => {}, } }, - Ok(Event::End(e)) => { - if e.local_name().as_ref() == b"sdtPr" { - // End of content control properties - if let Some(id) = current_id { - controls.push(ContentControl::new( - id, - current_tag.clone(), - current_title.clone(), - current_type.clone(), - current_lock_delete, - current_lock_content, - )); - } - in_sdt_pr = false; + Ok(Event::End(e)) if e.local_name().as_ref() == b"sdtPr" => { + // End of content control properties + if let Some(id) = current_id { + controls.push(ContentControl::new( + id, + current_tag.clone(), + current_title.clone(), + current_type.clone(), + current_lock_delete, + current_lock_content, + )); } + in_sdt_pr = false; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), diff --git a/src/ooxml/docx/document.rs b/src/ooxml/docx/document.rs index 2b643fb..b95c1d4 100644 --- a/src/ooxml/docx/document.rs +++ b/src/ooxml/docx/document.rs @@ -336,19 +336,17 @@ impl<'a> Document<'a> { sect_pr_content.extend_from_slice(b">"); } }, - Ok(Event::End(e)) => { - if in_sect_pr { - if e.local_name().as_ref() == b"sectPr" && depth == 1 { - // End of sectPr element - sect_pr_content.extend_from_slice(b""); - sections_xml.push(Section::from_xml_bytes(sect_pr_content.clone())?); - in_sect_pr = false; - } else { - depth -= 1; - sect_pr_content.extend_from_slice(b""); - } + Ok(Event::End(e)) if in_sect_pr => { + if e.local_name().as_ref() == b"sectPr" && depth == 1 { + // End of sectPr element + sect_pr_content.extend_from_slice(b""); + sections_xml.push(Section::from_xml_bytes(sect_pr_content.clone())?); + in_sect_pr = false; + } else { + depth -= 1; + sect_pr_content.extend_from_slice(b""); } }, Ok(Event::Empty(e)) if in_sect_pr => { diff --git a/src/ooxml/docx/field.rs b/src/ooxml/docx/field.rs index d8564bf..f0dfc03 100644 --- a/src/ooxml/docx/field.rs +++ b/src/ooxml/docx/field.rs @@ -151,13 +151,12 @@ impl Field { in_field_result = false; } }, - "separate" => { + "separate" // Separator between instruction and result - if field_depth == 1 { + if field_depth == 1 => { in_instr_text = false; in_field_result = true; - } - }, + }, "end" => { // End of field if field_depth == 1 { @@ -183,12 +182,11 @@ impl Field { } } }, - b"instrText" => { + b"instrText" // Field instruction text - if field_depth > 0 { + if field_depth > 0 => { in_instr_text = true; - } - }, + }, b"t" => { // Text element - could be part of field result // Will be handled in Text event @@ -207,10 +205,8 @@ impl Field { current_result.push_str(text); } }, - Ok(Event::End(e)) => { - if e.local_name().as_ref() == b"instrText" { - in_instr_text = false; - } + Ok(Event::End(e)) if e.local_name().as_ref() == b"instrText" => { + in_instr_text = false; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), diff --git a/src/ooxml/docx/footnote.rs b/src/ooxml/docx/footnote.rs index adc97c8..5995166 100644 --- a/src/ooxml/docx/footnote.rs +++ b/src/ooxml/docx/footnote.rs @@ -139,19 +139,15 @@ impl Note { loop { match reader.read_event() { - Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { - if e.local_name().as_ref() == b"t" { - in_text_element = true; - } + Ok(Event::Start(e)) | Ok(Event::Empty(e)) if e.local_name().as_ref() == b"t" => { + in_text_element = true; }, Ok(Event::Text(e)) if in_text_element => { let text = unsafe { std::str::from_utf8_unchecked(e.as_ref()) }; result.push_str(text); }, - Ok(Event::End(e)) => { - if e.local_name().as_ref() == b"t" { - in_text_element = false; - } + Ok(Event::End(e)) if e.local_name().as_ref() == b"t" => { + in_text_element = false; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), @@ -223,33 +219,29 @@ impl Note { current_para_xml.extend_from_slice(b">"); } }, - Ok(Event::End(e)) => { - if in_para { - current_para_xml.extend_from_slice(b""); - - if e.local_name().as_ref() == b"p" && depth == 1 { - paragraphs.push(Paragraph::new(current_para_xml.clone())); - in_para = false; - } else { - depth -= 1; - } + Ok(Event::End(e)) if in_para => { + current_para_xml.extend_from_slice(b""); + + if e.local_name().as_ref() == b"p" && depth == 1 { + paragraphs.push(Paragraph::new(current_para_xml.clone())); + in_para = false; + } else { + depth -= 1; } }, - Ok(Event::Empty(e)) => { - if in_para { - current_para_xml.extend_from_slice(b"<"); - current_para_xml.extend_from_slice(e.name().as_ref()); - for attr in e.attributes().flatten() { - current_para_xml.extend_from_slice(b" "); - current_para_xml.extend_from_slice(attr.key.as_ref()); - current_para_xml.extend_from_slice(b"=\""); - current_para_xml.extend_from_slice(&attr.value); - current_para_xml.extend_from_slice(b"\""); - } - current_para_xml.extend_from_slice(b"/>"); + Ok(Event::Empty(e)) if in_para => { + current_para_xml.extend_from_slice(b"<"); + current_para_xml.extend_from_slice(e.name().as_ref()); + for attr in e.attributes().flatten() { + current_para_xml.extend_from_slice(b" "); + current_para_xml.extend_from_slice(attr.key.as_ref()); + current_para_xml.extend_from_slice(b"=\""); + current_para_xml.extend_from_slice(&attr.value); + current_para_xml.extend_from_slice(b"\""); } + current_para_xml.extend_from_slice(b"/>"); }, Ok(Event::Text(e)) if in_para => { current_para_xml.extend_from_slice(e.as_ref()); @@ -352,43 +344,35 @@ impl Note { current_note_xml.extend_from_slice(b">"); } }, - Ok(Event::End(e)) => { - if in_note { - current_note_xml.extend_from_slice(b""); - - if e.local_name().as_ref() == note_tag && depth == 1 { - // End of note element - if let Some(id) = current_id { - // Skip separator notes (negative IDs or special types) - if id > 0 && current_type.is_normal() { - notes.push(Note::new( - id, - current_note_xml.clone(), - current_type, - )); - } + Ok(Event::End(e)) if in_note => { + current_note_xml.extend_from_slice(b""); + + if e.local_name().as_ref() == note_tag && depth == 1 { + // End of note element + if let Some(id) = current_id { + // Skip separator notes (negative IDs or special types) + if id > 0 && current_type.is_normal() { + notes.push(Note::new(id, current_note_xml.clone(), current_type)); } - in_note = false; - } else { - depth -= 1; } + in_note = false; + } else { + depth -= 1; } }, - Ok(Event::Empty(e)) => { - if in_note { - current_note_xml.extend_from_slice(b"<"); - current_note_xml.extend_from_slice(e.name().as_ref()); - for attr in e.attributes().flatten() { - current_note_xml.extend_from_slice(b" "); - current_note_xml.extend_from_slice(attr.key.as_ref()); - current_note_xml.extend_from_slice(b"=\""); - current_note_xml.extend_from_slice(&attr.value); - current_note_xml.extend_from_slice(b"\""); - } - current_note_xml.extend_from_slice(b"/>"); + Ok(Event::Empty(e)) if in_note => { + current_note_xml.extend_from_slice(b"<"); + current_note_xml.extend_from_slice(e.name().as_ref()); + for attr in e.attributes().flatten() { + current_note_xml.extend_from_slice(b" "); + current_note_xml.extend_from_slice(attr.key.as_ref()); + current_note_xml.extend_from_slice(b"=\""); + current_note_xml.extend_from_slice(&attr.value); + current_note_xml.extend_from_slice(b"\""); } + current_note_xml.extend_from_slice(b"/>"); }, Ok(Event::Text(e)) if in_note => { current_note_xml.extend_from_slice(e.as_ref()); diff --git a/src/ooxml/docx/header_footer.rs b/src/ooxml/docx/header_footer.rs index ed930ce..912f9e7 100644 --- a/src/ooxml/docx/header_footer.rs +++ b/src/ooxml/docx/header_footer.rs @@ -115,20 +115,16 @@ impl HeaderFooter { loop { match reader.read_event() { - Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { - if e.local_name().as_ref() == b"t" { - in_text_element = true; - } + Ok(Event::Start(e)) | Ok(Event::Empty(e)) if e.local_name().as_ref() == b"t" => { + in_text_element = true; }, Ok(Event::Text(e)) if in_text_element => { // Use unsafe conversion for better performance (safe since XML is validated) let text = unsafe { std::str::from_utf8_unchecked(e.as_ref()) }; result.push_str(text); }, - Ok(Event::End(e)) => { - if e.local_name().as_ref() == b"t" { - in_text_element = false; - } + Ok(Event::End(e)) if e.local_name().as_ref() == b"t" => { + in_text_element = false; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), @@ -201,33 +197,29 @@ impl HeaderFooter { current_para_xml.extend_from_slice(b">"); } }, - Ok(Event::End(e)) => { - if in_para { - current_para_xml.extend_from_slice(b""); - - if e.local_name().as_ref() == b"p" && depth == 1 { - paragraphs.push(Paragraph::new(current_para_xml.clone())); - in_para = false; - } else { - depth -= 1; - } + Ok(Event::End(e)) if in_para => { + current_para_xml.extend_from_slice(b""); + + if e.local_name().as_ref() == b"p" && depth == 1 { + paragraphs.push(Paragraph::new(current_para_xml.clone())); + in_para = false; + } else { + depth -= 1; } }, - Ok(Event::Empty(e)) => { - if in_para { - current_para_xml.extend_from_slice(b"<"); - current_para_xml.extend_from_slice(e.name().as_ref()); - for attr in e.attributes().flatten() { - current_para_xml.extend_from_slice(b" "); - current_para_xml.extend_from_slice(attr.key.as_ref()); - current_para_xml.extend_from_slice(b"=\""); - current_para_xml.extend_from_slice(&attr.value); - current_para_xml.extend_from_slice(b"\""); - } - current_para_xml.extend_from_slice(b"/>"); + Ok(Event::Empty(e)) if in_para => { + current_para_xml.extend_from_slice(b"<"); + current_para_xml.extend_from_slice(e.name().as_ref()); + for attr in e.attributes().flatten() { + current_para_xml.extend_from_slice(b" "); + current_para_xml.extend_from_slice(attr.key.as_ref()); + current_para_xml.extend_from_slice(b"=\""); + current_para_xml.extend_from_slice(&attr.value); + current_para_xml.extend_from_slice(b"\""); } + current_para_xml.extend_from_slice(b"/>"); }, Ok(Event::Text(e)) if in_para => { current_para_xml.extend_from_slice(e.as_ref()); @@ -306,33 +298,29 @@ impl HeaderFooter { current_table_xml.extend_from_slice(b">"); } }, - Ok(Event::End(e)) => { - if in_table { - current_table_xml.extend_from_slice(b""); - - if e.local_name().as_ref() == b"tbl" && depth == 1 { - tables.push(Table::new(current_table_xml.clone())); - in_table = false; - } else { - depth -= 1; - } + Ok(Event::End(e)) if in_table => { + current_table_xml.extend_from_slice(b""); + + if e.local_name().as_ref() == b"tbl" && depth == 1 { + tables.push(Table::new(current_table_xml.clone())); + in_table = false; + } else { + depth -= 1; } }, - Ok(Event::Empty(e)) => { - if in_table { - current_table_xml.extend_from_slice(b"<"); - current_table_xml.extend_from_slice(e.name().as_ref()); - for attr in e.attributes().flatten() { - current_table_xml.extend_from_slice(b" "); - current_table_xml.extend_from_slice(attr.key.as_ref()); - current_table_xml.extend_from_slice(b"=\""); - current_table_xml.extend_from_slice(&attr.value); - current_table_xml.extend_from_slice(b"\""); - } - current_table_xml.extend_from_slice(b"/>"); + Ok(Event::Empty(e)) if in_table => { + current_table_xml.extend_from_slice(b"<"); + current_table_xml.extend_from_slice(e.name().as_ref()); + for attr in e.attributes().flatten() { + current_table_xml.extend_from_slice(b" "); + current_table_xml.extend_from_slice(attr.key.as_ref()); + current_table_xml.extend_from_slice(b"=\""); + current_table_xml.extend_from_slice(&attr.value); + current_table_xml.extend_from_slice(b"\""); } + current_table_xml.extend_from_slice(b"/>"); }, Ok(Event::Text(e)) if in_table => { current_table_xml.extend_from_slice(e.as_ref()); @@ -360,10 +348,8 @@ impl HeaderFooter { loop { match reader.read_event() { - Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { - if e.local_name().as_ref() == b"p" { - count += 1; - } + Ok(Event::Start(e)) | Ok(Event::Empty(e)) if e.local_name().as_ref() == b"p" => { + count += 1; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), @@ -383,10 +369,8 @@ impl HeaderFooter { loop { match reader.read_event() { - Ok(Event::Start(e)) => { - if e.local_name().as_ref() == b"tbl" { - count += 1; - } + Ok(Event::Start(e)) if e.local_name().as_ref() == b"tbl" => { + count += 1; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), diff --git a/src/ooxml/docx/paragraph.rs b/src/ooxml/docx/paragraph.rs index b000d60..c1a8081 100644 --- a/src/ooxml/docx/paragraph.rs +++ b/src/ooxml/docx/paragraph.rs @@ -122,20 +122,16 @@ impl Paragraph { loop { match reader.read_event() { - Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { - if e.local_name().as_ref() == b"t" { - in_text_element = true; - } + Ok(Event::Start(e)) | Ok(Event::Empty(e)) if e.local_name().as_ref() == b"t" => { + in_text_element = true; }, Ok(Event::Text(e)) if in_text_element => { // Use unsafe conversion for better performance (safe since we validate XML) let text = unsafe { std::str::from_utf8_unchecked(e.as_ref()) }; result.push_str(text); }, - Ok(Event::End(e)) => { - if e.local_name().as_ref() == b"t" { - in_text_element = false; - } + Ok(Event::End(e)) if e.local_name().as_ref() == b"t" => { + in_text_element = false; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), @@ -652,10 +648,8 @@ impl Run { let text = unsafe { std::str::from_utf8_unchecked(e.as_ref()) }; result.push_str(text); }, - Ok(Event::End(e)) => { - if e.local_name().as_ref() == b"t" { - in_text_element = false; - } + Ok(Event::End(e)) if e.local_name().as_ref() == b"t" => { + in_text_element = false; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), @@ -707,10 +701,8 @@ impl Run { return Ok(Some(true)); } }, - Ok(Event::End(e)) => { - if e.local_name().as_ref() == b"rPr" { - in_r_pr = false; - } + Ok(Event::End(e)) if e.local_name().as_ref() == b"rPr" => { + in_r_pr = false; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), @@ -838,11 +830,9 @@ impl Run { } } }, - Ok(Event::Text(e)) => { - if in_text_element { - let text_str = unsafe { std::str::from_utf8_unchecked(e.as_ref()) }; - text.push_str(text_str); - } + Ok(Event::Text(e)) if in_text_element => { + let text_str = unsafe { std::str::from_utf8_unchecked(e.as_ref()) }; + text.push_str(text_str); }, Ok(Event::End(e)) => { let name = e.local_name(); @@ -964,11 +954,9 @@ impl Run { } } }, - Ok(Event::End(e)) => { - if e.local_name().as_ref() == b"rPr" { - // Exit early once we've finished parsing rPr - return Ok(props); - } + Ok(Event::End(e)) if e.local_name().as_ref() == b"rPr" => { + // Exit early once we've finished parsing rPr + return Ok(props); }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), @@ -1009,10 +997,8 @@ impl Run { } } }, - Ok(Event::End(e)) => { - if e.local_name().as_ref() == b"rPr" { - in_r_pr = false; - } + Ok(Event::End(e)) if e.local_name().as_ref() == b"rPr" => { + in_r_pr = false; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), @@ -1049,10 +1035,8 @@ impl Run { } } }, - Ok(Event::End(e)) => { - if e.local_name().as_ref() == b"rPr" { - break; - } + Ok(Event::End(e)) if e.local_name().as_ref() == b"rPr" => { + break; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), @@ -1090,10 +1074,8 @@ impl Run { } } }, - Ok(Event::End(e)) => { - if e.local_name().as_ref() == b"rPr" { - break; - } + Ok(Event::End(e)) if e.local_name().as_ref() == b"rPr" => { + break; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), @@ -1345,10 +1327,8 @@ impl Run { return Ok(Some(true)); } }, - Ok(Event::End(e)) => { - if e.local_name().as_ref() == b"rPr" { - in_r_pr = false; - } + Ok(Event::End(e)) if e.local_name().as_ref() == b"rPr" => { + in_r_pr = false; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), diff --git a/src/ooxml/docx/parts/document_part.rs b/src/ooxml/docx/parts/document_part.rs index fb712b4..eeb0eaf 100644 --- a/src/ooxml/docx/parts/document_part.rs +++ b/src/ooxml/docx/parts/document_part.rs @@ -61,22 +61,20 @@ impl<'a> DocumentPart<'a> { // Use read_event() for zero-copy parsing from slice loop { match reader.read_event() { - Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { + Ok(Event::Start(e)) | Ok(Event::Empty(e)) // Check if this is a w:t element - if e.local_name().as_ref() == b"t" { + if e.local_name().as_ref() == b"t" => { in_text_element = true; - } - }, + }, Ok(Event::Text(e)) if in_text_element => { // Extract text content - use unsafe conversion for better performance let text = unsafe { std::str::from_utf8_unchecked(e.as_ref()) }; result.push_str(text); }, - Ok(Event::End(e)) => { - if e.local_name().as_ref() == b"t" { + Ok(Event::End(e)) + if e.local_name().as_ref() == b"t" => { in_text_element = false; - } - }, + }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), _ => {}, @@ -97,10 +95,8 @@ impl<'a> DocumentPart<'a> { loop { match reader.read_event() { - Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { - if e.local_name().as_ref() == b"p" { - count += 1; - } + Ok(Event::Start(e)) | Ok(Event::Empty(e)) if e.local_name().as_ref() == b"p" => { + count += 1; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), @@ -122,10 +118,8 @@ impl<'a> DocumentPart<'a> { loop { match reader.read_event() { - Ok(Event::Start(e)) => { - if e.local_name().as_ref() == b"tbl" { - count += 1; - } + Ok(Event::Start(e)) if e.local_name().as_ref() == b"tbl" => { + count += 1; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), @@ -171,17 +165,15 @@ impl<'a> DocumentPart<'a> { write_start_tag_dynamic(&mut current_para_xml, &e); } }, - Ok(Event::End(e)) => { - if in_para { - write_end_tag(&mut current_para_xml, e.name().as_ref()); - depth -= 1; - if depth == 0 && e.local_name().as_ref() == b"p" { - // Clone bytes and clear buffer (preserves capacity for next element) - let para_xml = current_para_xml.clone(); - current_para_xml.clear(); - paragraphs.push(Paragraph::new(para_xml)); - in_para = false; - } + Ok(Event::End(e)) if in_para => { + write_end_tag(&mut current_para_xml, e.name().as_ref()); + depth -= 1; + if depth == 0 && e.local_name().as_ref() == b"p" { + // Clone bytes and clear buffer (preserves capacity for next element) + let para_xml = current_para_xml.clone(); + current_para_xml.clear(); + paragraphs.push(Paragraph::new(para_xml)); + in_para = false; } }, Ok(Event::Text(e)) if in_para => { @@ -231,17 +223,15 @@ impl<'a> DocumentPart<'a> { write_start_tag_dynamic(&mut current_table_xml, &e); } }, - Ok(Event::End(e)) => { - if in_table { - write_end_tag(&mut current_table_xml, e.name().as_ref()); - depth -= 1; - if depth == 0 && e.local_name().as_ref() == b"tbl" { - // Clone bytes and clear buffer (preserves capacity for next element) - let table_xml = current_table_xml.clone(); - current_table_xml.clear(); - tables.push(Table::new(table_xml)); - in_table = false; - } + Ok(Event::End(e)) if in_table => { + write_end_tag(&mut current_table_xml, e.name().as_ref()); + depth -= 1; + if depth == 0 && e.local_name().as_ref() == b"tbl" { + // Clone bytes and clear buffer (preserves capacity for next element) + let table_xml = current_table_xml.clone(); + current_table_xml.clear(); + tables.push(Table::new(table_xml)); + in_table = false; } }, Ok(Event::Text(e)) if in_table => { diff --git a/src/ooxml/docx/table.rs b/src/ooxml/docx/table.rs index 7cfbd2e..de13cf7 100644 --- a/src/ooxml/docx/table.rs +++ b/src/ooxml/docx/table.rs @@ -115,10 +115,8 @@ impl Table { loop { match reader.read_event() { - Ok(Event::Start(e)) => { - if e.local_name().as_ref() == b"tr" { - count += 1; - } + Ok(Event::Start(e)) if e.local_name().as_ref() == b"tr" => { + count += 1; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), @@ -198,20 +196,18 @@ impl Table { current_row_xml.push(b'>'); } }, - Ok(Event::End(e)) => { - if in_row { - current_row_xml.extend_from_slice(b"'); + Ok(Event::End(e)) if in_row => { + current_row_xml.extend_from_slice(b"'); - depth -= 1; - if depth == 0 && e.local_name().as_ref() == b"tr" { - // Clone bytes and clear buffer (preserves capacity for next row) - let row_xml = current_row_xml.clone(); - current_row_xml.clear(); - rows.push(Row::new(row_xml)); - in_row = false; - } + depth -= 1; + if depth == 0 && e.local_name().as_ref() == b"tr" { + // Clone bytes and clear buffer (preserves capacity for next row) + let row_xml = current_row_xml.clone(); + current_row_xml.clear(); + rows.push(Row::new(row_xml)); + in_row = false; } }, Ok(Event::Text(e)) if in_row => { @@ -297,10 +293,8 @@ impl Row { loop { match reader.read_event() { - Ok(Event::Start(e)) => { - if e.local_name().as_ref() == b"tc" { - count += 1; - } + Ok(Event::Start(e)) if e.local_name().as_ref() == b"tc" => { + count += 1; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), @@ -368,20 +362,18 @@ impl Row { current_cell_xml.push(b'>'); } }, - Ok(Event::End(e)) => { - if in_cell { - current_cell_xml.extend_from_slice(b"'); + Ok(Event::End(e)) if in_cell => { + current_cell_xml.extend_from_slice(b"'); - depth -= 1; - if depth == 0 && e.local_name().as_ref() == b"tc" { - // Clone bytes and clear buffer (preserves capacity for next cell) - let cell_xml = current_cell_xml.clone(); - current_cell_xml.clear(); - cells.push(Cell::new(cell_xml)); - in_cell = false; - } + depth -= 1; + if depth == 0 && e.local_name().as_ref() == b"tc" { + // Clone bytes and clear buffer (preserves capacity for next cell) + let cell_xml = current_cell_xml.clone(); + current_cell_xml.clear(); + cells.push(Cell::new(cell_xml)); + in_cell = false; } }, Ok(Event::Text(e)) if in_cell => { @@ -486,10 +478,8 @@ impl Cell { return Ok(1); } }, - Ok(Event::End(e)) => { - if e.local_name().as_ref() == b"tcPr" { - in_tc_pr = false; - } + Ok(Event::End(e)) if e.local_name().as_ref() == b"tcPr" => { + in_tc_pr = false; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), @@ -555,10 +545,8 @@ impl Cell { return Ok(Some(VMergeState::Continue)); } }, - Ok(Event::End(e)) => { - if e.local_name().as_ref() == b"tcPr" { - in_tc_pr = false; - } + Ok(Event::End(e)) if e.local_name().as_ref() == b"tcPr" => { + in_tc_pr = false; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), @@ -600,19 +588,15 @@ impl Cell { loop { match reader.read_event() { - Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { - if e.local_name().as_ref() == b"t" { - in_text_element = true; - } + Ok(Event::Start(e)) | Ok(Event::Empty(e)) if e.local_name().as_ref() == b"t" => { + in_text_element = true; }, Ok(Event::Text(e)) if in_text_element => { let text = std::str::from_utf8(e.as_ref()).unwrap_or(""); result.push_str(text); }, - Ok(Event::End(e)) => { - if e.local_name().as_ref() == b"t" { - in_text_element = false; - } + Ok(Event::End(e)) if e.local_name().as_ref() == b"t" => { + in_text_element = false; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), @@ -668,20 +652,18 @@ impl Cell { current_para_xml.push(b'>'); } }, - Ok(Event::End(e)) => { - if in_para { - current_para_xml.extend_from_slice(b"'); + Ok(Event::End(e)) if in_para => { + current_para_xml.extend_from_slice(b"'); - depth -= 1; - if depth == 0 && e.local_name().as_ref() == b"p" { - // Clone bytes and clear buffer (preserves capacity for next paragraph) - let para_xml = current_para_xml.clone(); - current_para_xml.clear(); - paragraphs.push(Paragraph::new(para_xml)); - in_para = false; - } + depth -= 1; + if depth == 0 && e.local_name().as_ref() == b"p" { + // Clone bytes and clear buffer (preserves capacity for next paragraph) + let para_xml = current_para_xml.clone(); + current_para_xml.clear(); + paragraphs.push(Paragraph::new(para_xml)); + in_para = false; } }, Ok(Event::Text(e)) if in_para => { diff --git a/src/ooxml/docx/variables.rs b/src/ooxml/docx/variables.rs index 445a501..b6b5e53 100644 --- a/src/ooxml/docx/variables.rs +++ b/src/ooxml/docx/variables.rs @@ -88,26 +88,26 @@ impl DocumentVariables { loop { match reader.read_event() { - Ok(Event::Empty(e)) | Ok(Event::Start(e)) => { - if e.local_name().as_ref() == b"docVar" { - let mut name = None; - let mut value = None; - - for attr in e.attributes().flatten() { - match attr.key.local_name().as_ref() { - b"name" => { - name = Some(String::from_utf8_lossy(&attr.value).into_owned()); - }, - b"val" => { - value = Some(String::from_utf8_lossy(&attr.value).into_owned()); - }, - _ => {}, - } + Ok(Event::Empty(e)) | Ok(Event::Start(e)) + if e.local_name().as_ref() == b"docVar" => + { + let mut name = None; + let mut value = None; + + for attr in e.attributes().flatten() { + match attr.key.local_name().as_ref() { + b"name" => { + name = Some(String::from_utf8_lossy(&attr.value).into_owned()); + }, + b"val" => { + value = Some(String::from_utf8_lossy(&attr.value).into_owned()); + }, + _ => {}, } + } - if let (Some(n), Some(v)) = (name, value) { - variables.insert(n, v); - } + if let (Some(n), Some(v)) = (name, value) { + variables.insert(n, v); } }, Ok(Event::Eof) => break, diff --git a/src/ooxml/opc/part.rs b/src/ooxml/opc/part.rs index b290d6d..7cac0d6 100644 --- a/src/ooxml/opc/part.rs +++ b/src/ooxml/opc/part.rs @@ -237,19 +237,18 @@ impl XmlPart { loop { match reader.read_event() { - Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => { + Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) // Fast byte-level comparison - if e.local_name().as_ref() == element_name_bytes { + if e.local_name().as_ref() == element_name_bytes => { in_target_element = true; - } - }, + }, Ok(Event::Text(e)) if in_target_element => { // Efficiently decode text without unnecessary allocation let text = std::str::from_utf8(e.as_ref())?; text_content.push_str(text); }, - Ok(Event::End(ref e)) => { - if e.local_name().as_ref() == element_name_bytes { + Ok(Event::End(ref e)) + if e.local_name().as_ref() == element_name_bytes => { in_target_element = false; if !text_content.is_empty() { // Cache the result @@ -257,8 +256,7 @@ impl XmlPart { .insert(element_name.to_string(), text_content.clone()); return Ok(Some(text_content)); } - } - }, + }, Ok(Event::Eof) => break, Err(e) => return Err(OpcError::XmlError(format!("XML parse error: {}", e))), _ => {}, @@ -282,17 +280,17 @@ impl XmlPart { loop { match reader.read_event() { - Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => { - if e.local_name().as_ref() == element_name_bytes { - let mut attrs = HashMap::new(); - for attr in e.attributes() { - let attr = attr?; - let key = std::str::from_utf8(attr.key.as_ref())?; - let value = attr.decode_and_unescape_value(reader.decoder())?; - attrs.insert(key.to_string(), value.to_string()); - } - results.push(attrs); + Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) + if e.local_name().as_ref() == element_name_bytes => + { + let mut attrs = HashMap::new(); + for attr in e.attributes() { + let attr = attr?; + let key = std::str::from_utf8(attr.key.as_ref())?; + let value = attr.decode_and_unescape_value(reader.decoder())?; + attrs.insert(key.to_string(), value.to_string()); } + results.push(attrs); }, Ok(Event::Eof) => break, Err(e) => return Err(OpcError::XmlError(format!("XML parse error: {}", e))), diff --git a/src/ooxml/opc/pkgreader.rs b/src/ooxml/opc/pkgreader.rs index 115edcd..206640d 100644 --- a/src/ooxml/opc/pkgreader.rs +++ b/src/ooxml/opc/pkgreader.rs @@ -279,52 +279,52 @@ impl PackageReader { loop { match reader.read_event() { - Ok(Event::Empty(ref e)) | Ok(Event::Start(ref e)) => { - if e.local_name().as_ref() == b"Relationship" { - let mut r_id = None; - let mut reltype = None; - let mut target_ref = None; - let mut target_mode = target_mode::INTERNAL.to_string(); - - for attr in e.attributes() { - let attr = attr?; - match attr.key.as_ref() { - b"Id" => { - r_id = Some( - attr.decode_and_unescape_value(reader.decoder())? - .to_string(), - ) - }, - b"Type" => { - reltype = Some( - attr.decode_and_unescape_value(reader.decoder())? - .to_string(), - ) - }, - b"Target" => { - target_ref = Some( - attr.decode_and_unescape_value(reader.decoder())? - .to_string(), - ) - }, - b"TargetMode" => { - target_mode = attr - .decode_and_unescape_value(reader.decoder())? - .to_string() - }, - _ => {}, - } + Ok(Event::Empty(ref e)) | Ok(Event::Start(ref e)) + if e.local_name().as_ref() == b"Relationship" => + { + let mut r_id = None; + let mut reltype = None; + let mut target_ref = None; + let mut target_mode = target_mode::INTERNAL.to_string(); + + for attr in e.attributes() { + let attr = attr?; + match attr.key.as_ref() { + b"Id" => { + r_id = Some( + attr.decode_and_unescape_value(reader.decoder())? + .to_string(), + ) + }, + b"Type" => { + reltype = Some( + attr.decode_and_unescape_value(reader.decoder())? + .to_string(), + ) + }, + b"Target" => { + target_ref = Some( + attr.decode_and_unescape_value(reader.decoder())? + .to_string(), + ) + }, + b"TargetMode" => { + target_mode = attr + .decode_and_unescape_value(reader.decoder())? + .to_string() + }, + _ => {}, } + } - if let (Some(id), Some(rt), Some(tr)) = (r_id, reltype, target_ref) { - srels.push(SerializedRelationship { - base_uri: base_uri.to_string(), - r_id: id, - reltype: rt, - target_ref: tr, - target_mode, - }); - } + if let (Some(id), Some(rt), Some(tr)) = (r_id, reltype, target_ref) { + srels.push(SerializedRelationship { + base_uri: base_uri.to_string(), + r_id: id, + reltype: rt, + target_ref: tr, + target_mode, + }); } }, Ok(Event::Eof) => break, diff --git a/src/ooxml/pptx/backgrounds.rs b/src/ooxml/pptx/backgrounds.rs index f087416..41310a1 100644 --- a/src/ooxml/pptx/backgrounds.rs +++ b/src/ooxml/pptx/backgrounds.rs @@ -330,10 +330,8 @@ impl SlideBackground { } } }, - Ok(Event::End(ref e)) => { - if e.local_name().as_ref() == b"bg" { - in_bg = false; - } + Ok(Event::End(ref e)) if e.local_name().as_ref() == b"bg" => { + in_bg = false; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), @@ -444,12 +442,10 @@ impl SlideBackground { } } }, - Ok(Event::End(ref e)) => { - if e.local_name().as_ref() == b"gradFill" { - depth -= 1; - if depth == 0 { - break; - } + Ok(Event::End(ref e)) if e.local_name().as_ref() == b"gradFill" => { + depth -= 1; + if depth == 0 { + break; } }, Ok(Event::Eof) => break, diff --git a/src/ooxml/pptx/customshow.rs b/src/ooxml/pptx/customshow.rs index eeebd23..724ba6b 100644 --- a/src/ooxml/pptx/customshow.rs +++ b/src/ooxml/pptx/customshow.rs @@ -124,27 +124,24 @@ impl CustomShowList { loop { match reader.read_event() { - Ok(Event::Start(e)) => { - if e.local_name().as_ref() == b"custShow" { - let mut name = String::new(); - let mut id = 0u32; - for attr in e.attributes().flatten() { - match attr.key.as_ref() { - b"name" => { - name = - std::str::from_utf8(&attr.value).unwrap_or("").to_string(); - }, - b"id" => { - id = std::str::from_utf8(&attr.value) - .ok() - .and_then(|s| s.parse().ok()) - .unwrap_or(0); - }, - _ => {}, - } + Ok(Event::Start(e)) if e.local_name().as_ref() == b"custShow" => { + let mut name = String::new(); + let mut id = 0u32; + for attr in e.attributes().flatten() { + match attr.key.as_ref() { + b"name" => { + name = std::str::from_utf8(&attr.value).unwrap_or("").to_string(); + }, + b"id" => { + id = std::str::from_utf8(&attr.value) + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(0); + }, + _ => {}, } - current_show = Some(CustomShow::new(id, name)); } + current_show = Some(CustomShow::new(id, name)); }, Ok(Event::Empty(e)) => { if e.local_name().as_ref() == b"sld" diff --git a/src/ooxml/pptx/parts/comment.rs b/src/ooxml/pptx/parts/comment.rs index 31acc79..423522b 100644 --- a/src/ooxml/pptx/parts/comment.rs +++ b/src/ooxml/pptx/parts/comment.rs @@ -355,36 +355,36 @@ impl<'a> CommentAuthorsPart<'a> { loop { match reader.read_event() { - Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => { - if e.local_name().as_ref() == b"cmAuthor" { - let mut id = 0; - let mut name = String::new(); - let mut initials = String::new(); - - for attr in e.attributes().flatten() { - match attr.key.as_ref() { - b"id" => { - id = std::str::from_utf8(&attr.value) - .ok() - .and_then(|s| s.parse().ok()) - .unwrap_or(0); - }, - b"name" => { - name = std::str::from_utf8(&attr.value) - .map(|s| s.to_string()) - .unwrap_or_default(); - }, - b"initials" => { - initials = std::str::from_utf8(&attr.value) - .map(|s| s.to_string()) - .unwrap_or_default(); - }, - _ => {}, - } + Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) + if e.local_name().as_ref() == b"cmAuthor" => + { + let mut id = 0; + let mut name = String::new(); + let mut initials = String::new(); + + for attr in e.attributes().flatten() { + match attr.key.as_ref() { + b"id" => { + id = std::str::from_utf8(&attr.value) + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(0); + }, + b"name" => { + name = std::str::from_utf8(&attr.value) + .map(|s| s.to_string()) + .unwrap_or_default(); + }, + b"initials" => { + initials = std::str::from_utf8(&attr.value) + .map(|s| s.to_string()) + .unwrap_or_default(); + }, + _ => {}, } - - authors.push(CommentAuthor { id, name, initials }); } + + authors.push(CommentAuthor { id, name, initials }); }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), diff --git a/src/ooxml/pptx/parts/presentation.rs b/src/ooxml/pptx/parts/presentation.rs index bbf6ca4..7b20dc7 100644 --- a/src/ooxml/pptx/parts/presentation.rs +++ b/src/ooxml/pptx/parts/presentation.rs @@ -62,10 +62,10 @@ impl<'a> PresentationPart<'a> { loop { match reader.read_event() { - Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { - if e.local_name().as_ref() == b"sldId" { - count += 1; - } + Ok(Event::Start(e)) | Ok(Event::Empty(e)) + if e.local_name().as_ref() == b"sldId" => + { + count += 1; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), @@ -93,16 +93,16 @@ impl<'a> PresentationPart<'a> { loop { match reader.read_event() { - Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { - if e.local_name().as_ref() == b"sldSz" { - for attr in e.attributes().flatten() { - if attr.key.as_ref() == b"cx" { - let value = std::str::from_utf8(&attr.value) - .map_err(|e| OoxmlError::Xml(e.to_string()))?; - return value.parse::().map(Some).map_err(|e| { - OoxmlError::Xml(format!("Invalid slide width: {}", e)) - }); - } + Ok(Event::Start(e)) | Ok(Event::Empty(e)) + if e.local_name().as_ref() == b"sldSz" => + { + for attr in e.attributes().flatten() { + if attr.key.as_ref() == b"cx" { + let value = std::str::from_utf8(&attr.value) + .map_err(|e| OoxmlError::Xml(e.to_string()))?; + return value.parse::().map(Some).map_err(|e| { + OoxmlError::Xml(format!("Invalid slide width: {}", e)) + }); } } }, @@ -132,16 +132,16 @@ impl<'a> PresentationPart<'a> { loop { match reader.read_event() { - Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { - if e.local_name().as_ref() == b"sldSz" { - for attr in e.attributes().flatten() { - if attr.key.as_ref() == b"cy" { - let value = std::str::from_utf8(&attr.value) - .map_err(|e| OoxmlError::Xml(e.to_string()))?; - return value.parse::().map(Some).map_err(|e| { - OoxmlError::Xml(format!("Invalid slide height: {}", e)) - }); - } + Ok(Event::Start(e)) | Ok(Event::Empty(e)) + if e.local_name().as_ref() == b"sldSz" => + { + for attr in e.attributes().flatten() { + if attr.key.as_ref() == b"cy" { + let value = std::str::from_utf8(&attr.value) + .map_err(|e| OoxmlError::Xml(e.to_string()))?; + return value.parse::().map(Some).map_err(|e| { + OoxmlError::Xml(format!("Invalid slide height: {}", e)) + }); } } }, @@ -175,24 +175,23 @@ impl<'a> PresentationPart<'a> { loop { match reader.read_event() { - Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { - if e.local_name().as_ref() == b"sldId" { - for attr in e.attributes().flatten() { - // Look for r:id attribute (can be r:id or just id with relationships namespace) - let key = attr.key.as_ref(); - // Check if this is the relationship ID attribute - if key == b"r:id" - || (key.starts_with(b"r:") - && attr.key.local_name().as_ref() == b"id") - || attr.key.local_name().as_ref() == b"id" - { - let rid = std::str::from_utf8(&attr.value) - .map_err(|e| OoxmlError::Xml(e.to_string()))?; - // Only push if it looks like a relationship ID (starts with "rId") - if rid.starts_with("rId") { - rids.push(rid.to_string()); - break; - } + Ok(Event::Start(e)) | Ok(Event::Empty(e)) + if e.local_name().as_ref() == b"sldId" => + { + for attr in e.attributes().flatten() { + // Look for r:id attribute (can be r:id or just id with relationships namespace) + let key = attr.key.as_ref(); + // Check if this is the relationship ID attribute + if key == b"r:id" + || (key.starts_with(b"r:") && attr.key.local_name().as_ref() == b"id") + || attr.key.local_name().as_ref() == b"id" + { + let rid = std::str::from_utf8(&attr.value) + .map_err(|e| OoxmlError::Xml(e.to_string()))?; + // Only push if it looks like a relationship ID (starts with "rId") + if rid.starts_with("rId") { + rids.push(rid.to_string()); + break; } } } @@ -221,24 +220,23 @@ impl<'a> PresentationPart<'a> { loop { match reader.read_event() { - Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { - if e.local_name().as_ref() == b"sldMasterId" { - for attr in e.attributes().flatten() { - // Look for r:id attribute (can be r:id or just id with relationships namespace) - let key = attr.key.as_ref(); - // Check if this is the relationship ID attribute - if key == b"r:id" - || (key.starts_with(b"r:") - && attr.key.local_name().as_ref() == b"id") - || attr.key.local_name().as_ref() == b"id" - { - let rid = std::str::from_utf8(&attr.value) - .map_err(|e| OoxmlError::Xml(e.to_string()))?; - // Only push if it looks like a relationship ID (starts with "rId") - if rid.starts_with("rId") { - rids.push(rid.to_string()); - break; - } + Ok(Event::Start(e)) | Ok(Event::Empty(e)) + if e.local_name().as_ref() == b"sldMasterId" => + { + for attr in e.attributes().flatten() { + // Look for r:id attribute (can be r:id or just id with relationships namespace) + let key = attr.key.as_ref(); + // Check if this is the relationship ID attribute + if key == b"r:id" + || (key.starts_with(b"r:") && attr.key.local_name().as_ref() == b"id") + || attr.key.local_name().as_ref() == b"id" + { + let rid = std::str::from_utf8(&attr.value) + .map_err(|e| OoxmlError::Xml(e.to_string()))?; + // Only push if it looks like a relationship ID (starts with "rId") + if rid.starts_with("rId") { + rids.push(rid.to_string()); + break; } } } diff --git a/src/ooxml/pptx/parts/slide.rs b/src/ooxml/pptx/parts/slide.rs index df4c965..19aef2d 100644 --- a/src/ooxml/pptx/parts/slide.rs +++ b/src/ooxml/pptx/parts/slide.rs @@ -36,14 +36,12 @@ impl<'a> SlidePart<'a> { loop { match reader.read_event() { - Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { - if e.local_name().as_ref() == b"cSld" { - for attr in e.attributes().flatten() { - if attr.key.as_ref() == b"name" { - let name = std::str::from_utf8(&attr.value) - .map_err(|e| OoxmlError::Xml(e.to_string()))?; - return Ok(name.to_string()); - } + Ok(Event::Start(e)) | Ok(Event::Empty(e)) if e.local_name().as_ref() == b"cSld" => { + for attr in e.attributes().flatten() { + if attr.key.as_ref() == b"name" { + let name = std::str::from_utf8(&attr.value) + .map_err(|e| OoxmlError::Xml(e.to_string()))?; + return Ok(name.to_string()); } } }, @@ -68,12 +66,11 @@ impl<'a> SlidePart<'a> { loop { match reader.read_event() { - Ok(Event::Start(e)) => { + Ok(Event::Start(e)) // Check if this is an a:t element (DrawingML text) - if e.local_name().as_ref() == b"t" { + if e.local_name().as_ref() == b"t" => { in_text_element = true; - } - }, + }, Ok(Event::Text(e)) if in_text_element => { // Extract text content let t = std::str::from_utf8(e.as_ref()) @@ -83,11 +80,10 @@ impl<'a> SlidePart<'a> { } text.push_str(t); }, - Ok(Event::End(e)) => { - if e.local_name().as_ref() == b"t" { + Ok(Event::End(e)) + if e.local_name().as_ref() == b"t" => { in_text_element = false; - } - }, + }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), _ => {}, @@ -256,14 +252,12 @@ impl<'a> SlideLayoutPart<'a> { loop { match reader.read_event() { - Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { - if e.local_name().as_ref() == b"cSld" { - for attr in e.attributes().flatten() { - if attr.key.as_ref() == b"name" { - let name = std::str::from_utf8(&attr.value) - .map_err(|e| OoxmlError::Xml(e.to_string()))?; - return Ok(name.to_string()); - } + Ok(Event::Start(e)) | Ok(Event::Empty(e)) if e.local_name().as_ref() == b"cSld" => { + for attr in e.attributes().flatten() { + if attr.key.as_ref() == b"name" { + let name = std::str::from_utf8(&attr.value) + .map_err(|e| OoxmlError::Xml(e.to_string()))?; + return Ok(name.to_string()); } } }, @@ -310,14 +304,12 @@ impl<'a> SlideMasterPart<'a> { loop { match reader.read_event() { - Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { - if e.local_name().as_ref() == b"cSld" { - for attr in e.attributes().flatten() { - if attr.key.as_ref() == b"name" { - let name = std::str::from_utf8(&attr.value) - .map_err(|e| OoxmlError::Xml(e.to_string()))?; - return Ok(name.to_string()); - } + Ok(Event::Start(e)) | Ok(Event::Empty(e)) if e.local_name().as_ref() == b"cSld" => { + for attr in e.attributes().flatten() { + if attr.key.as_ref() == b"name" { + let name = std::str::from_utf8(&attr.value) + .map_err(|e| OoxmlError::Xml(e.to_string()))?; + return Ok(name.to_string()); } } }, @@ -339,24 +331,23 @@ impl<'a> SlideMasterPart<'a> { loop { match reader.read_event() { - Ok(Event::Start(e)) | Ok(Event::Empty(e)) => { - if e.local_name().as_ref() == b"sldLayoutId" { - for attr in e.attributes().flatten() { - // Look for r:id attribute (can be r:id or just id with relationships namespace) - let key = attr.key.as_ref(); - // Check if this is the relationship ID attribute - if key == b"r:id" - || (key.starts_with(b"r:") - && attr.key.local_name().as_ref() == b"id") - || attr.key.local_name().as_ref() == b"id" - { - let rid = std::str::from_utf8(&attr.value) - .map_err(|e| OoxmlError::Xml(e.to_string()))?; - // Only push if it looks like a relationship ID (starts with "rId") - if rid.starts_with("rId") { - rids.push(rid.to_string()); - break; - } + Ok(Event::Start(e)) | Ok(Event::Empty(e)) + if e.local_name().as_ref() == b"sldLayoutId" => + { + for attr in e.attributes().flatten() { + // Look for r:id attribute (can be r:id or just id with relationships namespace) + let key = attr.key.as_ref(); + // Check if this is the relationship ID attribute + if key == b"r:id" + || (key.starts_with(b"r:") && attr.key.local_name().as_ref() == b"id") + || attr.key.local_name().as_ref() == b"id" + { + let rid = std::str::from_utf8(&attr.value) + .map_err(|e| OoxmlError::Xml(e.to_string()))?; + // Only push if it looks like a relationship ID (starts with "rId") + if rid.starts_with("rId") { + rids.push(rid.to_string()); + break; } } } diff --git a/src/ooxml/pptx/presentation.rs b/src/ooxml/pptx/presentation.rs index f8699d9..4bbbef4 100644 --- a/src/ooxml/pptx/presentation.rs +++ b/src/ooxml/pptx/presentation.rs @@ -740,32 +740,30 @@ impl<'a> Presentation<'a> { loop { match reader.read_event() { - Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => { - if e.local_name().as_ref() == b"hlinkClick" { - let mut action = None; - let mut tooltip = None; - - for attr in e.attributes().flatten() { - match attr.key.as_ref() { - b"action" => { - action = std::str::from_utf8(&attr.value) - .ok() - .map(|s| s.to_string()); - }, - b"tooltip" => { - tooltip = std::str::from_utf8(&attr.value) - .ok() - .map(|s| s.to_string()); - }, - _ => {}, - } + Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) + if e.local_name().as_ref() == b"hlinkClick" => + { + let mut action = None; + let mut tooltip = None; + + for attr in e.attributes().flatten() { + match attr.key.as_ref() { + b"action" => { + action = + std::str::from_utf8(&attr.value).ok().map(|s| s.to_string()); + }, + b"tooltip" => { + tooltip = + std::str::from_utf8(&attr.value).ok().map(|s| s.to_string()); + }, + _ => {}, } + } - if let Some(action_str) = action - && let Ok(hyperlink) = Hyperlink::from_xml(&action_str, tooltip) - { - hyperlinks.push(hyperlink); - } + if let Some(action_str) = action + && let Ok(hyperlink) = Hyperlink::from_xml(&action_str, tooltip) + { + hyperlinks.push(hyperlink); } }, Ok(Event::Eof) => break, diff --git a/src/ooxml/pptx/protection.rs b/src/ooxml/pptx/protection.rs index ccf567a..6531ee7 100644 --- a/src/ooxml/pptx/protection.rs +++ b/src/ooxml/pptx/protection.rs @@ -248,50 +248,49 @@ impl PresentationProtection { loop { match reader.read_event() { - Ok(Event::Empty(e)) | Ok(Event::Start(e)) => { - if e.local_name().as_ref() == b"modifyVerifier" { - protection.modify_password_protected = true; - for attr in e.attributes().flatten() { - match attr.key.as_ref() { - // ISO-style attributes - b"hashValue" | b"hashData" => { - protection.modify_password_hash = Some( - std::str::from_utf8(&attr.value).unwrap_or("").to_string(), - ); - }, - b"saltValue" | b"saltData" => { - protection.modify_password_salt = Some( - std::str::from_utf8(&attr.value).unwrap_or("").to_string(), - ); - }, - b"spinCount" | b"spinValue" => { - protection.modify_spin_count = std::str::from_utf8(&attr.value) - .ok() - .and_then(|s| s.parse().ok()) - .unwrap_or(100000); - }, - b"algorithmName" | b"algIdExt" => { - if let Ok(uri) = std::str::from_utf8(&attr.value) { - protection.modify_algorithm = - CryptoAlgorithm::from_uri(uri); - } - }, - // Legacy SID-based form - b"cryptAlgorithmSid" => { - if let Ok(text) = std::str::from_utf8(&attr.value) - && let Ok(sid) = text.parse::() - { - protection.modify_algorithm = match sid { - 4 => CryptoAlgorithm::Sha1, - 12 => CryptoAlgorithm::Sha256, - 13 => CryptoAlgorithm::Sha384, - 14 => CryptoAlgorithm::Sha512, - _ => protection.modify_algorithm, - }; - } - }, - _ => {}, - } + Ok(Event::Empty(e)) | Ok(Event::Start(e)) + if e.local_name().as_ref() == b"modifyVerifier" => + { + protection.modify_password_protected = true; + for attr in e.attributes().flatten() { + match attr.key.as_ref() { + // ISO-style attributes + b"hashValue" | b"hashData" => { + protection.modify_password_hash = Some( + std::str::from_utf8(&attr.value).unwrap_or("").to_string(), + ); + }, + b"saltValue" | b"saltData" => { + protection.modify_password_salt = Some( + std::str::from_utf8(&attr.value).unwrap_or("").to_string(), + ); + }, + b"spinCount" | b"spinValue" => { + protection.modify_spin_count = std::str::from_utf8(&attr.value) + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(100000); + }, + b"algorithmName" | b"algIdExt" => { + if let Ok(uri) = std::str::from_utf8(&attr.value) { + protection.modify_algorithm = CryptoAlgorithm::from_uri(uri); + } + }, + // Legacy SID-based form + b"cryptAlgorithmSid" => { + if let Ok(text) = std::str::from_utf8(&attr.value) + && let Ok(sid) = text.parse::() + { + protection.modify_algorithm = match sid { + 4 => CryptoAlgorithm::Sha1, + 12 => CryptoAlgorithm::Sha256, + 13 => CryptoAlgorithm::Sha384, + 14 => CryptoAlgorithm::Sha512, + _ => protection.modify_algorithm, + }; + } + }, + _ => {}, } } }, diff --git a/src/ooxml/pptx/shapes/base.rs b/src/ooxml/pptx/shapes/base.rs index f9e3930..556b7e1 100644 --- a/src/ooxml/pptx/shapes/base.rs +++ b/src/ooxml/pptx/shapes/base.rs @@ -92,15 +92,14 @@ impl BaseShape { loop { match reader.read_event() { - Ok(Event::Empty(e)) | Ok(Event::Start(e)) => { - if e.local_name().as_ref() == b"cNvPr" { - for attr in e.attributes().flatten() { - if attr.key.as_ref() == b"name" { - let name = - std::str::from_utf8(&attr.value).unwrap_or("").to_string(); - self.name = Some(name.clone()); - return Ok(name); - } + Ok(Event::Empty(e)) | Ok(Event::Start(e)) + if e.local_name().as_ref() == b"cNvPr" => + { + for attr in e.attributes().flatten() { + if attr.key.as_ref() == b"name" { + let name = std::str::from_utf8(&attr.value).unwrap_or("").to_string(); + self.name = Some(name.clone()); + return Ok(name); } } }, @@ -144,10 +143,8 @@ impl BaseShape { loop { match reader.read_event() { - Ok(Event::Empty(e)) | Ok(Event::Start(e)) => { - if e.local_name().as_ref() == b"ph" { - return true; - } + Ok(Event::Empty(e)) | Ok(Event::Start(e)) if e.local_name().as_ref() == b"ph" => { + return true; }, Ok(Event::Eof) => break, Err(_) => break, @@ -177,21 +174,17 @@ impl BaseShape { loop { match reader.read_event() { - Ok(Event::Empty(e)) | Ok(Event::Start(e)) => { - if e.local_name().as_ref() == b"ph" { - // Look for the type attribute - for attr in e.attributes().flatten() { - if attr.key.as_ref() == b"type" { - return std::str::from_utf8(&attr.value) - .map(|s| s.to_string()) - .map_err(|e| { - crate::ooxml::error::OoxmlError::Xml(e.to_string()) - }); - } + Ok(Event::Empty(e)) | Ok(Event::Start(e)) if e.local_name().as_ref() == b"ph" => { + // Look for the type attribute + for attr in e.attributes().flatten() { + if attr.key.as_ref() == b"type" { + return std::str::from_utf8(&attr.value) + .map(|s| s.to_string()) + .map_err(|e| crate::ooxml::error::OoxmlError::Xml(e.to_string())); } - // If no type attribute, it's usually a body placeholder - return Ok("body".to_string()); } + // If no type attribute, it's usually a body placeholder + return Ok("body".to_string()); }, Ok(Event::Eof) => break, Err(e) => return Err(crate::ooxml::error::OoxmlError::Xml(e.to_string())), diff --git a/src/ooxml/pptx/shapes/table.rs b/src/ooxml/pptx/shapes/table.rs index 3dd4c4a..22901c4 100644 --- a/src/ooxml/pptx/shapes/table.rs +++ b/src/ooxml/pptx/shapes/table.rs @@ -67,16 +67,14 @@ impl Table { table_xml.push(b'>'); } }, - Ok(Event::End(e)) => { - if in_table { - table_xml.extend_from_slice(b"'); + Ok(Event::End(e)) if in_table => { + table_xml.extend_from_slice(b"'); - depth -= 1; - if depth == 0 && e.local_name().as_ref() == b"tbl" { - return Ok(Table::new(table_xml)); - } + depth -= 1; + if depth == 0 && e.local_name().as_ref() == b"tbl" { + return Ok(Table::new(table_xml)); } }, Ok(Event::Text(e)) if in_table => { @@ -114,12 +112,11 @@ impl Table { loop { match reader.read_event() { - Ok(Event::Start(e)) => { + Ok(Event::Start(e)) // DrawingML table rows are - if e.local_name().as_ref() == b"tr" { + if e.local_name().as_ref() == b"tr" => { count += 1; - } - }, + }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), _ => {}, @@ -173,17 +170,15 @@ impl Table { current_row_xml.push(b'>'); } }, - Ok(Event::End(e)) => { - if in_row { - current_row_xml.extend_from_slice(b"'); + Ok(Event::End(e)) if in_row => { + current_row_xml.extend_from_slice(b"'); - depth -= 1; - if depth == 0 && e.local_name().as_ref() == b"tr" { - rows.push(TableRow::new(current_row_xml.clone())); - in_row = false; - } + depth -= 1; + if depth == 0 && e.local_name().as_ref() == b"tr" { + rows.push(TableRow::new(current_row_xml.clone())); + in_row = false; } }, Ok(Event::Text(e)) if in_row => { @@ -246,12 +241,11 @@ impl TableRow { loop { match reader.read_event() { - Ok(Event::Start(e)) => { + Ok(Event::Start(e)) // DrawingML table cells are - if e.local_name().as_ref() == b"tc" { + if e.local_name().as_ref() == b"tc" => { count += 1; - } - }, + }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), _ => {}, @@ -293,17 +287,15 @@ impl TableRow { current_cell_xml.push(b'>'); } }, - Ok(Event::End(e)) => { - if in_cell { - current_cell_xml.extend_from_slice(b"'); + Ok(Event::End(e)) if in_cell => { + current_cell_xml.extend_from_slice(b"'); - depth -= 1; - if depth == 0 && e.local_name().as_ref() == b"tc" { - cells.push(TableCell::new(current_cell_xml.clone())); - in_cell = false; - } + depth -= 1; + if depth == 0 && e.local_name().as_ref() == b"tc" { + cells.push(TableCell::new(current_cell_xml.clone())); + in_cell = false; } }, Ok(Event::Text(e)) if in_cell => { @@ -354,10 +346,8 @@ impl TableCell { loop { match reader.read_event() { - Ok(Event::Start(e)) => { - if e.local_name().as_ref() == b"t" { - in_text_element = true; - } + Ok(Event::Start(e)) if e.local_name().as_ref() == b"t" => { + in_text_element = true; }, Ok(Event::Text(e)) if in_text_element => { let t = std::str::from_utf8(e.as_ref()) @@ -367,10 +357,8 @@ impl TableCell { } text.push_str(t); }, - Ok(Event::End(e)) => { - if e.local_name().as_ref() == b"t" { - in_text_element = false; - } + Ok(Event::End(e)) if e.local_name().as_ref() == b"t" => { + in_text_element = false; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), diff --git a/src/ooxml/pptx/shapes/textframe.rs b/src/ooxml/pptx/shapes/textframe.rs index 31b3482..811b611 100644 --- a/src/ooxml/pptx/shapes/textframe.rs +++ b/src/ooxml/pptx/shapes/textframe.rs @@ -49,12 +49,11 @@ impl TextFrame { loop { match reader.read_event() { - Ok(Event::Start(e)) => { + Ok(Event::Start(e)) // Check if this is an a:t element (DrawingML text) - if e.local_name().as_ref() == b"t" { + if e.local_name().as_ref() == b"t" => { in_text_element = true; - } - }, + }, Ok(Event::Text(e)) if in_text_element => { // Extract text content let t = std::str::from_utf8(e.as_ref()) @@ -64,11 +63,10 @@ impl TextFrame { } text.push_str(t); }, - Ok(Event::End(e)) => { - if e.local_name().as_ref() == b"t" { + Ok(Event::End(e)) + if e.local_name().as_ref() == b"t" => { in_text_element = false; - } - }, + }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), _ => {}, @@ -113,17 +111,15 @@ impl TextFrame { current_para_xml.push(b'>'); } }, - Ok(Event::End(e)) => { - if in_para { - current_para_xml.extend_from_slice(b"'); + Ok(Event::End(e)) if in_para => { + current_para_xml.extend_from_slice(b"'); - depth -= 1; - if depth == 0 && e.local_name().as_ref() == b"p" { - paragraphs.push(Paragraph::new(current_para_xml.clone())); - in_para = false; - } + depth -= 1; + if depth == 0 && e.local_name().as_ref() == b"p" { + paragraphs.push(Paragraph::new(current_para_xml.clone())); + in_para = false; } }, Ok(Event::Text(e)) if in_para => { @@ -195,20 +191,16 @@ impl Paragraph { loop { match reader.read_event() { - Ok(Event::Start(e)) => { - if e.local_name().as_ref() == b"t" { - in_text_element = true; - } + Ok(Event::Start(e)) if e.local_name().as_ref() == b"t" => { + in_text_element = true; }, Ok(Event::Text(e)) if in_text_element => { let t = std::str::from_utf8(e.as_ref()) .map_err(|e| OoxmlError::Xml(e.to_string()))?; text.push_str(t); }, - Ok(Event::End(e)) => { - if e.local_name().as_ref() == b"t" { - in_text_element = false; - } + Ok(Event::End(e)) if e.local_name().as_ref() == b"t" => { + in_text_element = false; }, Ok(Event::Eof) => break, Err(e) => return Err(OoxmlError::Xml(e.to_string())), diff --git a/src/ooxml/pptx/slide.rs b/src/ooxml/pptx/slide.rs index 1e43c68..1fdf0bf 100644 --- a/src/ooxml/pptx/slide.rs +++ b/src/ooxml/pptx/slide.rs @@ -462,10 +462,8 @@ impl<'a> Slide<'a> { loop { match reader.read_event() { - Ok(Event::Start(e)) => { - if e.local_name().as_ref() == b"t" { - in_text_element = true; - } + Ok(Event::Start(e)) if e.local_name().as_ref() == b"t" => { + in_text_element = true; }, Ok(Event::Text(e)) if in_text_element => { let t = std::str::from_utf8(e.as_ref()) @@ -475,10 +473,8 @@ impl<'a> Slide<'a> { } text.push_str(t); }, - Ok(Event::End(e)) => { - if e.local_name().as_ref() == b"t" { - in_text_element = false; - } + Ok(Event::End(e)) if e.local_name().as_ref() == b"t" => { + in_text_element = false; }, Ok(Event::Eof) => break, Err(e) => return Err(crate::ooxml::error::OoxmlError::Xml(e.to_string())), diff --git a/src/ooxml/xlsb/cells_reader.rs b/src/ooxml/xlsb/cells_reader.rs index d6aaf63..6040a72 100644 --- a/src/ooxml/xlsb/cells_reader.rs +++ b/src/ooxml/xlsb/cells_reader.rs @@ -105,25 +105,23 @@ where // BrtRowHdr self.current_row = binary::read_u32_le_at(&self.buf, 0)?; }, - 0x0001 => { + 0x0001 // BrtCellBlank - if self.buf.len() >= 4 { + if self.buf.len() >= 4 => { let col = binary::read_u32_le_at(&self.buf, 0)?; return Ok(Some(XlsbCell::new(self.current_row, col, CellValue::Empty))); - } - }, - 0x0002 => { + }, + 0x0002 // BrtCellRk - if self.buf.len() >= 12 { + if self.buf.len() >= 12 => { let col = binary::read_u32_le_at(&self.buf, 0)?; let rk_val = binary::read_u32_le_at(&self.buf, 8)?; let value = Self::parse_rk_value(rk_val); return Ok(Some(XlsbCell::new(self.current_row, col, value))); - } - }, - 0x0003 => { + }, + 0x0003 // BrtCellError - if self.buf.len() >= 9 { + if self.buf.len() >= 9 => { let col = binary::read_u32_le_at(&self.buf, 0)?; let error_code = self.buf[8]; let error_msg = match error_code { @@ -142,11 +140,10 @@ where col, CellValue::Error(error_msg.to_string()), ))); - } - }, - 0x0004 => { + }, + 0x0004 // BrtCellBool - if self.buf.len() >= 9 { + if self.buf.len() >= 9 => { let col = binary::read_u32_le_at(&self.buf, 0)?; let value = self.buf[8] != 0; return Ok(Some(XlsbCell::new( @@ -154,11 +151,10 @@ where col, CellValue::Bool(value), ))); - } - }, - 0x0005 => { + }, + 0x0005 // BrtCellReal - if self.buf.len() >= 16 { + if self.buf.len() >= 16 => { let col = binary::read_u32_le_at(&self.buf, 0)?; let value = binary::read_f64_le_at(&self.buf, 8)?; return Ok(Some(XlsbCell::new( @@ -166,11 +162,10 @@ where col, CellValue::Float(value), ))); - } - }, - 0x0006 => { + }, + 0x0006 // BrtCellSt - if self.buf.len() >= 8 { + if self.buf.len() >= 8 => { let col = binary::read_u32_le_at(&self.buf, 0)?; let (string, _) = super::records::wide_str_with_len(&self.buf[8..])?; return Ok(Some(XlsbCell::new( @@ -178,11 +173,10 @@ where col, CellValue::String(string), ))); - } - }, - 0x0007 => { + }, + 0x0007 // BrtCellIsst - if self.buf.len() >= 12 { + if self.buf.len() >= 12 => { let col = binary::read_u32_le_at(&self.buf, 0)?; let idx = binary::read_u32_le_at(&self.buf, 8)? as usize; let value = if idx < self.shared_strings.len() { @@ -191,11 +185,10 @@ where CellValue::Error("Invalid SST index".to_string()) }; return Ok(Some(XlsbCell::new(self.current_row, col, value))); - } - }, - 0x0008 => { + }, + 0x0008 // BrtFmlaString - formula with string result - if self.buf.len() >= 10 { + if self.buf.len() >= 10 => { let col = binary::read_u32_le_at(&self.buf, 0)?; // Skip style (4 bytes) + flags (1 byte) + formula length (4 bytes) let formula_len = binary::read_u32_le_at(&self.buf, 6)? as usize; @@ -209,11 +202,10 @@ where CellValue::String(string), ))); } - } - }, - 0x0009 => { + }, + 0x0009 // BrtFmlaNum - formula with numeric result - if self.buf.len() >= 18 { + if self.buf.len() >= 18 => { let col = binary::read_u32_le_at(&self.buf, 0)?; let formula_len = binary::read_u32_le_at(&self.buf, 6)? as usize; if self.buf.len() >= 10 + formula_len + 8 { @@ -224,11 +216,10 @@ where CellValue::Float(num_value), ))); } - } - }, - 0x000A => { + }, + 0x000A // BrtFmlaBool - formula with boolean result - if self.buf.len() >= 11 { + if self.buf.len() >= 11 => { let col = binary::read_u32_le_at(&self.buf, 0)?; let formula_len = binary::read_u32_le_at(&self.buf, 6)? as usize; if self.buf.len() > 10 + formula_len { @@ -239,11 +230,10 @@ where CellValue::Bool(bool_value), ))); } - } - }, - 0x000B => { + }, + 0x000B // BrtFmlaError - formula with error result - if self.buf.len() >= 11 { + if self.buf.len() >= 11 => { let col = binary::read_u32_le_at(&self.buf, 0)?; let formula_len = binary::read_u32_le_at(&self.buf, 6)? as usize; if self.buf.len() > 10 + formula_len { @@ -265,8 +255,7 @@ where CellValue::Error(error_msg.to_string()), ))); } - } - }, + }, _ => { // Skip unknown records }, diff --git a/src/ooxml/xlsb/named_ranges.rs b/src/ooxml/xlsb/named_ranges.rs index 5c2fa6f..c6fc88a 100644 --- a/src/ooxml/xlsb/named_ranges.rs +++ b/src/ooxml/xlsb/named_ranges.rs @@ -126,6 +126,17 @@ impl NamedRange { } } +/// Create a 3D area formula token stream for a workbook-local sheet range. +pub fn create_area3d_formula( + sheet_id: u32, + first_row: u16, + last_row: u16, + first_col: u16, + last_col: u16, +) -> Vec { + NamedRange::create_area3d_formula(sheet_id, first_row, last_row, first_col, last_col) +} + #[cfg(test)] mod tests { use super::*; @@ -152,14 +163,3 @@ mod tests { assert_eq!(u16::from_le_bytes([formula[9], formula[10]]), 1); } } - -/// Create a 3D area formula token stream for a workbook-local sheet range. -pub fn create_area3d_formula( - sheet_id: u32, - first_row: u16, - last_row: u16, - first_col: u16, - last_col: u16, -) -> Vec { - NamedRange::create_area3d_formula(sheet_id, first_row, last_row, first_col, last_col) -} diff --git a/src/ooxml/xlsb/writer/shared_strings.rs b/src/ooxml/xlsb/writer/shared_strings.rs index 484f433..d2fe978 100644 --- a/src/ooxml/xlsb/writer/shared_strings.rs +++ b/src/ooxml/xlsb/writer/shared_strings.rs @@ -131,7 +131,7 @@ mod tests { #[test] fn test_add_multiple_strings() { let mut writer = MutableSharedStringsWriter::new(); - let strings = vec!["A", "B", "C", "D", "E"]; + let strings = ["A", "B", "C", "D", "E"]; for (i, s) in strings.iter().enumerate() { let idx = writer.add_string(s.to_string()); diff --git a/src/ooxml/xlsx/workbook.rs b/src/ooxml/xlsx/workbook.rs index cdfe38b..c13a9e4 100644 --- a/src/ooxml/xlsx/workbook.rs +++ b/src/ooxml/xlsx/workbook.rs @@ -368,15 +368,11 @@ impl Workbook { // row or column reference. let mut chars = range.chars().skip_while(|c| *c == '$'); match chars.next() { - Some(ch) if ch.is_ascii_digit() => { - if rows.is_none() { - rows = Some(range.to_string()); - } + Some(ch) if ch.is_ascii_digit() && rows.is_none() => { + rows = Some(range.to_string()); }, - Some(ch) if ch.is_ascii_alphabetic() => { - if cols.is_none() { - cols = Some(range.to_string()); - } + Some(ch) if ch.is_ascii_alphabetic() && cols.is_none() => { + cols = Some(range.to_string()); }, _ => {}, } diff --git a/src/ooxml/xlsx/worksheet.rs b/src/ooxml/xlsx/worksheet.rs index f185b73..cd8d8c0 100644 --- a/src/ooxml/xlsx/worksheet.rs +++ b/src/ooxml/xlsx/worksheet.rs @@ -2013,26 +2013,6 @@ impl<'a> WorksheetTrait for Worksheet<'a> { } } -#[cfg(test)] -mod tests { - use super::Worksheet; - - #[test] - fn extract_inline_string_single_t() { - let xml = r#"Hello"#; - let text = Worksheet::extract_inline_string_text(xml).unwrap(); - assert_eq!(text, "Hello"); - } - - #[test] - fn extract_inline_string_multiple_runs() { - let xml = - r#"Hello World"#; - let text = Worksheet::extract_inline_string_text(xml).unwrap(); - assert_eq!(text, "Hello World"); - } -} - /// Iterator over worksheets in a workbook pub struct WorksheetIterator<'a> { worksheets: Vec, @@ -2071,3 +2051,23 @@ impl<'a> crate::sheet::WorksheetIterator<'a> for WorksheetIterator<'a> { // Import Workbook from the workbook module use super::workbook::Workbook; + +#[cfg(test)] +mod tests { + use super::Worksheet; + + #[test] + fn extract_inline_string_single_t() { + let xml = r#"Hello"#; + let text = Worksheet::extract_inline_string_text(xml).unwrap(); + assert_eq!(text, "Hello"); + } + + #[test] + fn extract_inline_string_multiple_runs() { + let xml = + r#"Hello World"#; + let text = Worksheet::extract_inline_string_text(xml).unwrap(); + assert_eq!(text, "Hello World"); + } +} diff --git a/src/rtf/types.rs b/src/rtf/types.rs index 06eb7b1..a851c31 100644 --- a/src/rtf/types.rs +++ b/src/rtf/types.rs @@ -495,7 +495,7 @@ mod tests { #[test] fn test_color_clone() { let color = Color::new(100, 150, 200); - let cloned = color.clone(); + let cloned = color; assert_eq!(cloned.red, color.red); assert_eq!(cloned.green, color.green); assert_eq!(cloned.blue, color.blue); @@ -737,7 +737,7 @@ mod tests { fn test_paragraph_content_text() { let fmt = Formatting::default(); let runs = vec![ - Run::new(Cow::Borrowed("Hello "), fmt.clone()), + Run::new(Cow::Borrowed("Hello "), fmt), Run::new(Cow::Borrowed("World"), fmt), ]; let content = ParagraphContent::new(Paragraph::default(), runs); diff --git a/src/sheet/eval/engine/math/random.rs b/src/sheet/eval/engine/math/random.rs index 3312cc7..77ce4af 100644 --- a/src/sheet/eval/engine/math/random.rs +++ b/src/sheet/eval/engine/math/random.rs @@ -81,7 +81,7 @@ mod tests { match result { CellValue::Float(v) => { // RAND should return a value between 0 and 1 - assert!(v >= 0.0 && v < 1.0); + assert!((0.0..1.0).contains(&v)); }, _ => panic!("Expected Float"), } @@ -108,7 +108,7 @@ mod tests { match result { CellValue::Int(v) => { // RANDBETWEEN should return an integer in the range [1, 10] - assert!(v >= 1 && v <= 10); + assert!((1..=10).contains(&v)); }, _ => panic!("Expected Int"), } @@ -135,7 +135,7 @@ mod tests { let result = eval_randbetween(ctx, "Sheet1", &args).await.unwrap(); match result { CellValue::Int(v) => { - assert!(v >= -10 && v <= -1); + assert!((-10..=-1).contains(&v)); }, _ => panic!("Expected Int"), } @@ -149,7 +149,7 @@ mod tests { let result = eval_randbetween(ctx, "Sheet1", &args).await.unwrap(); match result { CellValue::Int(v) => { - assert!(v >= -5 && v <= 5); + assert!((-5..=5).contains(&v)); }, _ => panic!("Expected Int"), } diff --git a/src/sheet/eval/engine/statistical/distributions.rs b/src/sheet/eval/engine/statistical/distributions.rs index 01b39e0..cb98991 100644 --- a/src/sheet/eval/engine/statistical/distributions.rs +++ b/src/sheet/eval/engine/statistical/distributions.rs @@ -3003,7 +3003,7 @@ mod tests { let args = vec![range1, range2, int_expr(2), int_expr(1)]; let result = eval_t_test(ctx, "Sheet1", &args).await.unwrap(); match result { - CellValue::Float(v) => assert!(v >= 0.0 && v <= 1.0), + CellValue::Float(v) => assert!((0.0..=1.0).contains(&v)), _ => panic!("Expected Float, got {:?}", result), } } @@ -3039,7 +3039,7 @@ mod tests { let args = vec![num_expr(6.0), num_expr(0.5), num_expr(0.75)]; let result = eval_binom_inv(ctx, "Sheet1", &args).await.unwrap(); match result { - CellValue::Int(v) => assert!(v >= 0 && v <= 6), + CellValue::Int(v) => assert!((0..=6).contains(&v)), _ => panic!("Expected Int, got {:?}", result), } } diff --git a/src/sheet/workbook_types.rs b/src/sheet/workbook_types.rs index 5bfc036..780790c 100644 --- a/src/sheet/workbook_types.rs +++ b/src/sheet/workbook_types.rs @@ -217,7 +217,7 @@ mod tests { #[test] fn test_workbook_format_clone() { let format = WorkbookFormat::Xls; - let cloned = format.clone(); + let cloned = format; assert_eq!(format, cloned); } From ab83334a2bdc6f85423d222711b039352ea96cd4 Mon Sep 17 00:00:00 2001 From: Ryker Zhu Date: Wed, 29 Apr 2026 03:47:53 +0800 Subject: [PATCH 02/25] Pre-refactor hotfix: fix breakage exposed by workspace inclusion Three pre-existing breakages on `main` only manifest when the crates are built together as a Cargo workspace with --all-targets. Fixing them now unblocks the upcoming workspace-split refactor. - soapberry-zip: rewrite doc-comment `rawzip::` -> `soapberry_zip::` in src/ to match the published crate name (litchi imports `soapberry_zip::`). - soapberry-zip: add `jiff = "0.2"` to [dev-dependencies] (used by `mod property_tests` in src/time.rs but never declared, latent because `cargo check` without `--all-targets` skipped it). - pyo3-litchi: widen `boxed_err_to_py_err` to accept `Box`, matching the actual error type returned by the litchi sheet API. --- pyo3-litchi/src/common.rs | 2 +- soapberry-zip/Cargo.toml | 1 + soapberry-zip/src/archive.rs | 20 ++++++++++---------- soapberry-zip/src/locator.rs | 14 +++++++------- soapberry-zip/src/path.rs | 2 +- soapberry-zip/src/reader_at.rs | 2 +- soapberry-zip/src/time.rs | 6 +++--- soapberry-zip/src/writer.rs | 24 ++++++++++++------------ 8 files changed, 36 insertions(+), 35 deletions(-) diff --git a/pyo3-litchi/src/common.rs b/pyo3-litchi/src/common.rs index 9e9f8e7..4d23234 100644 --- a/pyo3-litchi/src/common.rs +++ b/pyo3-litchi/src/common.rs @@ -26,7 +26,7 @@ pub fn to_py_err(err: litchi::Error) -> PyErr { } /// Converts a boxed error to a Python exception -pub fn boxed_err_to_py_err(err: Box) -> PyErr { +pub fn boxed_err_to_py_err(err: Box) -> PyErr { PyException::new_err(err.to_string()) } diff --git a/soapberry-zip/Cargo.toml b/soapberry-zip/Cargo.toml index 875d366..104e5db 100644 --- a/soapberry-zip/Cargo.toml +++ b/soapberry-zip/Cargo.toml @@ -18,6 +18,7 @@ flate2 = { version = "1", features = ["zlib-rs"], default-features = false } rayon = "1" [dev-dependencies] +jiff = "0.2" paste = "1.0" quickcheck = "1.0.3" quickcheck_macros = "1.1.0" diff --git a/soapberry-zip/src/archive.rs b/soapberry-zip/src/archive.rs index 8c4d94e..b7f9c3a 100644 --- a/soapberry-zip/src/archive.rs +++ b/soapberry-zip/src/archive.rs @@ -32,7 +32,7 @@ pub const RECOMMENDED_BUFFER_SIZE: usize = 1 << 16; /// # Examples /// /// ```rust -/// use rawzip::{ZipArchive, ZipSliceArchive, Error}; +/// use soapberry_zip::{ZipArchive, ZipSliceArchive, Error}; /// /// fn process_zip_slice(data: &[u8]) -> Result<(), Error> { /// let archive = ZipArchive::from_slice(data)?; @@ -349,7 +349,7 @@ impl<'data> Iterator for ZipSliceEntries<'data> { /// Creating from a file: /// /// ```rust -/// # use rawzip::{ZipArchive, Error, RECOMMENDED_BUFFER_SIZE}; +/// # use soapberry_zip::{ZipArchive, Error, RECOMMENDED_BUFFER_SIZE}; /// # use std::fs::File; /// # use std::io; /// fn example_from_file(file: File) -> Result<(), Error> { @@ -398,7 +398,7 @@ impl ZipArchive<()> { /// reader in a mutex to support positioned io. /// /// ```rust - /// # use rawzip::{ZipArchive, Error, RECOMMENDED_BUFFER_SIZE, ZipFileHeaderRecord}; + /// # use soapberry_zip::{ZipArchive, Error, RECOMMENDED_BUFFER_SIZE, ZipFileHeaderRecord}; /// # use std::io::Cursor; /// fn example(zip_data: &[u8]) -> Result<(), Error> { /// let mut buffer = vec![0u8; RECOMMENDED_BUFFER_SIZE]; @@ -443,7 +443,7 @@ impl ZipArchive { /// reader. /// /// ```rust - /// # use rawzip::{ZipArchive, Error, RECOMMENDED_BUFFER_SIZE, ZipFileHeaderRecord}; + /// # use soapberry_zip::{ZipArchive, Error, RECOMMENDED_BUFFER_SIZE, ZipFileHeaderRecord}; /// # use std::fs::File; /// fn example(file: File) -> Result<(), Error> { /// let mut buffer = vec![0u8; RECOMMENDED_BUFFER_SIZE]; @@ -488,7 +488,7 @@ impl ZipArchive { /// # Examples /// /// ```rust - /// use rawzip::{ZipArchive, ZipStr, RECOMMENDED_BUFFER_SIZE}; + /// use soapberry_zip::{ZipArchive, ZipStr, RECOMMENDED_BUFFER_SIZE}; /// use std::io::Read; /// use std::fs::File; /// @@ -521,7 +521,7 @@ impl ZipArchive { /// # Examples /// /// ```rust - /// # use rawzip::{ZipArchive, ZipLocator, RECOMMENDED_BUFFER_SIZE}; + /// # use soapberry_zip::{ZipArchive, ZipLocator, RECOMMENDED_BUFFER_SIZE}; /// # use std::fs::File; /// # fn example() -> Result<(), Box> { /// # let file = File::open("assets/test.zip")?; @@ -660,7 +660,7 @@ where /// data: /// /// ```rust - /// # use rawzip::{ZipArchive, Error}; + /// # use soapberry_zip::{ZipArchive, Error}; /// # fn example(data: &[u8]) -> Result<(), Error> { /// let archive = ZipArchive::from_slice(data)?; /// let mut ranges = Vec::new(); @@ -706,7 +706,7 @@ where /// # Examples /// /// ```rust - /// # use rawzip::{ZipArchive, RECOMMENDED_BUFFER_SIZE, extra_fields::ExtraFieldId}; + /// # use soapberry_zip::{ZipArchive, RECOMMENDED_BUFFER_SIZE, extra_fields::ExtraFieldId}; /// # use std::fs::File; /// # fn example() -> Result<(), Box> { /// // Test with filename mismatch test fixture @@ -1547,7 +1547,7 @@ impl<'a> ZipFileHeaderRecord<'a> { /// /// # Example /// ```rust - /// # use rawzip::ZipArchive; + /// # use soapberry_zip::ZipArchive; /// # fn example() -> Result<(), Box> { /// # let data = include_bytes!("../assets/test.zip"); /// # let archive = ZipArchive::from_slice(data)?; @@ -1629,7 +1629,7 @@ impl<'a> ZipFileHeaderRecord<'a> { /// # Examples /// /// ```rust - /// # use rawzip::{ZipArchive, extra_fields::ExtraFieldId}; + /// # use soapberry_zip::{ZipArchive, extra_fields::ExtraFieldId}; /// # fn example(data: &[u8]) -> Result<(), Box> { /// let archive = ZipArchive::from_slice(data)?; /// for entry_result in archive.entries() { diff --git a/soapberry-zip/src/locator.rs b/soapberry-zip/src/locator.rs index 700bfed..23d176d 100644 --- a/soapberry-zip/src/locator.rs +++ b/soapberry-zip/src/locator.rs @@ -50,7 +50,7 @@ impl ZipLocator { /// The search is performed backwards from the end of the data source. /// /// ```rust - /// use rawzip::ZipLocator; + /// use soapberry_zip::ZipLocator; /// /// let locator = ZipLocator::new().max_search_space(1024 * 64); // 64 KiB /// ``` @@ -136,7 +136,7 @@ impl ZipLocator { /// # Examples /// /// ```rust - /// use rawzip::ZipLocator; + /// use soapberry_zip::ZipLocator; /// use std::fs; /// use std::io::Read; /// @@ -179,12 +179,12 @@ impl ZipLocator { /// # Examples /// /// ```rust - /// use rawzip::ZipLocator; + /// use soapberry_zip::ZipLocator; /// use std::fs::File; /// /// # fn main() -> Result<(), Box> { /// let file = File::open("assets/readme.zip")?; - /// let mut buffer = vec![0; rawzip::RECOMMENDED_BUFFER_SIZE]; + /// let mut buffer = vec![0; soapberry_zip::RECOMMENDED_BUFFER_SIZE]; /// let locator = ZipLocator::new(); /// /// match locator.locate_in_file(file, &mut buffer) { @@ -229,14 +229,14 @@ impl ZipLocator { /// # Examples /// /// ```rust - /// use rawzip::{ZipLocator, FileReader}; + /// use soapberry_zip::{ZipLocator, FileReader}; /// use std::fs::File; /// use std::io::Seek; /// - /// # fn main() -> Result<(), rawzip::Error> { + /// # fn main() -> Result<(), soapberry_zip::Error> { /// let file = File::open("assets/test.zip").unwrap(); /// let mut reader = FileReader::from(file); - /// let mut buffer = vec![0; rawzip::RECOMMENDED_BUFFER_SIZE]; + /// let mut buffer = vec![0; soapberry_zip::RECOMMENDED_BUFFER_SIZE]; /// let locator = ZipLocator::new(); /// /// // An example of determining the end offset when you don't diff --git a/soapberry-zip/src/path.rs b/soapberry-zip/src/path.rs index abb090f..65727f3 100644 --- a/soapberry-zip/src/path.rs +++ b/soapberry-zip/src/path.rs @@ -43,7 +43,7 @@ //! ## Usage Examples //! //! ```rust -//! use rawzip::path::ZipFilePath; +//! use soapberry_zip::path::ZipFilePath; //! //! // From raw bytes //! let raw_path = ZipFilePath::from_bytes(b"../../../etc/passwd"); diff --git a/soapberry-zip/src/reader_at.rs b/soapberry-zip/src/reader_at.rs index c1d5a8c..fb76b06 100644 --- a/soapberry-zip/src/reader_at.rs +++ b/soapberry-zip/src/reader_at.rs @@ -272,7 +272,7 @@ impl ReaderAt for Box { /// /// ``` /// use std::io::Read; -/// use rawzip::{ZipArchive, RangeReader, RECOMMENDED_BUFFER_SIZE}; +/// use soapberry_zip::{ZipArchive, RangeReader, RECOMMENDED_BUFFER_SIZE}; /// use std::fs::File; /// /// let file = File::open("assets/test-prefix.zip")?; diff --git a/soapberry-zip/src/time.rs b/soapberry-zip/src/time.rs index 15b080b..a5cd358 100644 --- a/soapberry-zip/src/time.rs +++ b/soapberry-zip/src/time.rs @@ -15,8 +15,8 @@ //! preserving modification times: //! //! ``` -//! use rawzip::{ZipArchive, ZipArchiveWriter, ZipDataWriter}; -//! use rawzip::time::{ZipDateTimeKind, UtcDateTime}; +//! use soapberry_zip::{ZipArchive, ZipArchiveWriter, ZipDataWriter}; +//! use soapberry_zip::time::{ZipDateTimeKind, UtcDateTime}; //! use std::io::Write; //! //! // Read a test ZIP file with timestamps @@ -276,7 +276,7 @@ impl ZipDateTime { /// # Examples /// /// ``` - /// # use rawzip::time::{UtcDateTime, LocalDateTime}; + /// # use soapberry_zip::time::{UtcDateTime, LocalDateTime}; /// let utc_datetime = UtcDateTime::from_components( /// 2023, 6, 15, 14, 30, 45, 500_000_000 /// ).unwrap(); diff --git a/soapberry-zip/src/writer.rs b/soapberry-zip/src/writer.rs index d083376..f72fcd2 100644 --- a/soapberry-zip/src/writer.rs +++ b/soapberry-zip/src/writer.rs @@ -89,13 +89,13 @@ impl ZipArchiveWriterBuilder { /// let zip_start_offset = output.position(); /// /// // Create ZIP archive starting after the prefix data - /// let mut archive = rawzip::ZipArchiveWriter::builder() + /// let mut archive = soapberry_zip::ZipArchiveWriter::builder() /// .with_offset(zip_start_offset) // Tell the archive where it starts /// .build(&mut output); /// /// // Add files normally /// let mut file = archive.new_file("data.txt").create().unwrap(); - /// let mut writer = rawzip::ZipDataWriter::new(&mut file); + /// let mut writer = soapberry_zip::ZipDataWriter::new(&mut file); /// writer.write_all(b"File content").unwrap(); /// let (_, desc) = writer.finish().unwrap(); /// file.finish(desc).unwrap(); @@ -127,7 +127,7 @@ impl ZipArchiveWriterBuilder { /// use std::io::Write; /// /// let mut output = std::io::Cursor::new(Vec::new()); -/// let mut archive = rawzip::ZipArchiveWriter::new(&mut output); +/// let mut archive = soapberry_zip::ZipArchiveWriter::new(&mut output); /// let (mut entry, config) = archive.new_file("file.txt").start().unwrap(); /// let mut writer = config.wrap(&mut entry); /// writer.write_all(b"Hello, world!").unwrap(); @@ -141,7 +141,7 @@ impl ZipArchiveWriterBuilder { /// use std::io::Write; /// /// let mut output = std::io::Cursor::new(Vec::::new()); -/// let mut _archive = rawzip::ZipArchiveWriter::builder() +/// let mut _archive = soapberry_zip::ZipArchiveWriter::builder() /// .with_capacity(1000) // Optimize for 1000 anticipated files /// .build(&mut output); /// // ... add files as usual @@ -184,7 +184,7 @@ impl ZipArchiveWriter { /// use std::io::Write; /// /// let mut output = std::io::Cursor::new(Vec::new()); - /// let mut archive = rawzip::ZipArchiveWriter::new(&mut output); + /// let mut archive = soapberry_zip::ZipArchiveWriter::new(&mut output); /// /// // 1. Get local header offset /// let local_header_offset = archive.stream_offset(); @@ -194,7 +194,7 @@ impl ZipArchiveWriter { /// let data_start_offset = file.stream_offset(); /// /// // Write some data - /// let mut writer = rawzip::ZipDataWriter::new(&mut file); + /// let mut writer = soapberry_zip::ZipDataWriter::new(&mut file); /// writer.write_all(b"Hello World").unwrap(); /// let (_, desc) = writer.finish().unwrap(); /// @@ -318,7 +318,7 @@ where /// /// ```rust /// # use std::io::{Cursor, Write}; - /// # use rawzip::{ZipArchive, ZipArchiveWriter, ZipDataWriter, extra_fields::ExtraFieldId, Header}; + /// # use soapberry_zip::{ZipArchive, ZipArchiveWriter, ZipDataWriter, extra_fields::ExtraFieldId, Header}; /// let mut output = Cursor::new(Vec::new()); /// let mut archive = ZipArchiveWriter::new(&mut output); /// @@ -432,7 +432,7 @@ where /// ``` /// # use std::io::Write; /// # let mut output = std::io::Cursor::new(Vec::new()); - /// # let mut archive = rawzip::ZipArchiveWriter::new(&mut output); + /// # let mut archive = soapberry_zip::ZipArchiveWriter::new(&mut output); /// let (mut entry, config) = archive.new_file("file.txt").start().unwrap(); /// let mut writer = config.wrap(&mut entry); /// writer.write_all(b"Hello").unwrap(); @@ -445,7 +445,7 @@ where /// ``` /// # use std::io::Write; /// # let mut output = std::io::Cursor::new(Vec::new()); - /// # let mut archive = rawzip::ZipArchiveWriter::new(&mut output); + /// # let mut archive = soapberry_zip::ZipArchiveWriter::new(&mut output); /// let (mut entry, config) = archive.new_file("file.txt").start().unwrap(); /// let encoder = flate2::write::DeflateEncoder::new(&mut entry, flate2::Compression::default()); /// let mut writer = config.wrap(encoder); @@ -656,7 +656,7 @@ where /// ```rust /// # use std::io::Cursor; /// # let mut output = Cursor::new(Vec::new()); - /// # let mut archive = rawzip::ZipArchiveWriter::new(&mut output); + /// # let mut archive = soapberry_zip::ZipArchiveWriter::new(&mut output); /// archive.new_dir("my-dir/") /// .unix_permissions(0o755) /// .create()?; @@ -733,9 +733,9 @@ where /// ```rust /// # use std::io::{Cursor, Write}; /// # let mut output = Cursor::new(Vec::new()); - /// # let mut archive = rawzip::ZipArchiveWriter::new(&mut output); + /// # let mut archive = soapberry_zip::ZipArchiveWriter::new(&mut output); /// let (mut entry, config) = archive.new_file("my-file") - /// .compression_method(rawzip::CompressionMethod::Deflate) + /// .compression_method(soapberry_zip::CompressionMethod::Deflate) /// .unix_permissions(0o644) /// .start()?; /// let mut writer = config.wrap(&mut entry); From eb2cfca371cbadd5692696f09c289ee4993c4546 Mon Sep 17 00:00:00 2001 From: Ryker Zhu Date: Wed, 29 Apr 2026 04:04:56 +0800 Subject: [PATCH 03/25] P0: introduce workspace skeleton with relocated sibling crates - Add [workspace], [workspace.package], [workspace.dependencies] to root. Resolver = 3. Members list explicit through P0-P5; switches to glob in P6. - Move soapberry-zip/, xml-minifier/, pyo3-litchi/ into crates/. - Repoint litchi's path deps in [dependencies] to crates/. - Repoint pyo3-litchi's litchi path dep from ".." to "../..". - Fix pyo3-litchi clippy debt exposed by workspace inclusion: silence arc_with_non_send_sync at three #[pyclass] Arc::new sites (PyO3 manages thread access via the GIL; Arc here is intra-thread refcounting only), and fix one useless_format in document.rs. Umbrella package `litchi` remains at repo root (src/lib.rs unchanged) until P6. --- Cargo.toml | 86 +- crates/pyo3-litchi/BUILD.md | 215 ++ crates/pyo3-litchi/Cargo.toml | 34 + crates/pyo3-litchi/MANIFEST.in | 6 + crates/pyo3-litchi/README.md | 270 +++ crates/pyo3-litchi/pyproject.toml | 41 + .../pyo3-litchi/python/litchi_py/__init__.pyi | 421 ++++ crates/pyo3-litchi/python/litchi_py/py.typed | 2 + crates/pyo3-litchi/src/common.rs | 261 +++ crates/pyo3-litchi/src/document.rs | 299 +++ crates/pyo3-litchi/src/lib.rs | 83 + crates/pyo3-litchi/src/presentation.rs | 131 ++ crates/pyo3-litchi/src/sheet.rs | 110 + crates/soapberry-zip/Cargo.toml | 25 + crates/soapberry-zip/README.md | 5 + .../assets/crc32-not-streamed.zip | Bin 0 -> 314 bytes .../assets/filename_mismatch_test.zip | Bin 0 -> 232 bytes .../assets/go-with-datadesc-sig.zip | Bin 0 -> 242 bytes .../soapberry-zip/assets/gophercolor16x16.png | Bin 0 -> 785 bytes crates/soapberry-zip/assets/omni-mini.ja | Bin 0 -> 1131 bytes .../assets/rawzip-benchmark-data.csv | 901 ++++++++ .../assets/rawzip-performance-comparison.png | Bin 0 -> 39564 bytes .../rawzip-write-performance-comparison.png | Bin 0 -> 43129 bytes crates/soapberry-zip/assets/readme.notzip | Bin 0 -> 1906 bytes crates/soapberry-zip/assets/readme.zip | Bin 0 -> 1886 bytes crates/soapberry-zip/assets/symlink.zip | Bin 0 -> 173 bytes crates/soapberry-zip/assets/time-7zip.zip | Bin 0 -> 150 bytes crates/soapberry-zip/assets/time-go.zip | Bin 0 -> 148 bytes crates/soapberry-zip/assets/time-infozip.zip | Bin 0 -> 166 bytes crates/soapberry-zip/assets/time-osx.zip | Bin 0 -> 142 bytes crates/soapberry-zip/assets/time-win7.zip | Bin 0 -> 114 bytes crates/soapberry-zip/assets/time-winrar.zip | Bin 0 -> 150 bytes crates/soapberry-zip/assets/time-winzip.zip | Bin 0 -> 150 bytes crates/soapberry-zip/assets/unix.zip | Bin 0 -> 620 bytes crates/soapberry-zip/assets/winxp.zip | Bin 0 -> 412 bytes crates/soapberry-zip/assets/zip64-2.zip | Bin 0 -> 266 bytes crates/soapberry-zip/assets/zip64.zip | Bin 0 -> 242 bytes crates/soapberry-zip/src/archive.rs | 1998 +++++++++++++++++ crates/soapberry-zip/src/crc.rs | 36 + crates/soapberry-zip/src/errors.rs | 171 ++ crates/soapberry-zip/src/extra_fields.rs | 454 ++++ crates/soapberry-zip/src/headers.rs | 118 + crates/soapberry-zip/src/lib.rs | 48 + crates/soapberry-zip/src/locator.rs | 960 ++++++++ crates/soapberry-zip/src/mode.rs | 95 + crates/soapberry-zip/src/office.rs | 641 ++++++ crates/soapberry-zip/src/path.rs | 475 ++++ crates/soapberry-zip/src/reader_at.rs | 533 +++++ crates/soapberry-zip/src/time.rs | 1232 ++++++++++ crates/soapberry-zip/src/utils.rs | 14 + crates/soapberry-zip/src/writer.rs | 1516 +++++++++++++ crates/xml-minifier/Cargo.toml | 11 + crates/xml-minifier/README.md | 147 ++ crates/xml-minifier/src/lib.rs | 1087 +++++++++ 54 files changed, 12424 insertions(+), 2 deletions(-) create mode 100644 crates/pyo3-litchi/BUILD.md create mode 100644 crates/pyo3-litchi/Cargo.toml create mode 100644 crates/pyo3-litchi/MANIFEST.in create mode 100644 crates/pyo3-litchi/README.md create mode 100644 crates/pyo3-litchi/pyproject.toml create mode 100644 crates/pyo3-litchi/python/litchi_py/__init__.pyi create mode 100644 crates/pyo3-litchi/python/litchi_py/py.typed create mode 100644 crates/pyo3-litchi/src/common.rs create mode 100644 crates/pyo3-litchi/src/document.rs create mode 100644 crates/pyo3-litchi/src/lib.rs create mode 100644 crates/pyo3-litchi/src/presentation.rs create mode 100644 crates/pyo3-litchi/src/sheet.rs create mode 100644 crates/soapberry-zip/Cargo.toml create mode 100644 crates/soapberry-zip/README.md create mode 100644 crates/soapberry-zip/assets/crc32-not-streamed.zip create mode 100644 crates/soapberry-zip/assets/filename_mismatch_test.zip create mode 100644 crates/soapberry-zip/assets/go-with-datadesc-sig.zip create mode 100644 crates/soapberry-zip/assets/gophercolor16x16.png create mode 100644 crates/soapberry-zip/assets/omni-mini.ja create mode 100644 crates/soapberry-zip/assets/rawzip-benchmark-data.csv create mode 100644 crates/soapberry-zip/assets/rawzip-performance-comparison.png create mode 100644 crates/soapberry-zip/assets/rawzip-write-performance-comparison.png create mode 100644 crates/soapberry-zip/assets/readme.notzip create mode 100644 crates/soapberry-zip/assets/readme.zip create mode 100644 crates/soapberry-zip/assets/symlink.zip create mode 100644 crates/soapberry-zip/assets/time-7zip.zip create mode 100644 crates/soapberry-zip/assets/time-go.zip create mode 100644 crates/soapberry-zip/assets/time-infozip.zip create mode 100644 crates/soapberry-zip/assets/time-osx.zip create mode 100644 crates/soapberry-zip/assets/time-win7.zip create mode 100644 crates/soapberry-zip/assets/time-winrar.zip create mode 100644 crates/soapberry-zip/assets/time-winzip.zip create mode 100644 crates/soapberry-zip/assets/unix.zip create mode 100644 crates/soapberry-zip/assets/winxp.zip create mode 100644 crates/soapberry-zip/assets/zip64-2.zip create mode 100644 crates/soapberry-zip/assets/zip64.zip create mode 100644 crates/soapberry-zip/src/archive.rs create mode 100644 crates/soapberry-zip/src/crc.rs create mode 100644 crates/soapberry-zip/src/errors.rs create mode 100644 crates/soapberry-zip/src/extra_fields.rs create mode 100644 crates/soapberry-zip/src/headers.rs create mode 100644 crates/soapberry-zip/src/lib.rs create mode 100644 crates/soapberry-zip/src/locator.rs create mode 100644 crates/soapberry-zip/src/mode.rs create mode 100644 crates/soapberry-zip/src/office.rs create mode 100644 crates/soapberry-zip/src/path.rs create mode 100644 crates/soapberry-zip/src/reader_at.rs create mode 100644 crates/soapberry-zip/src/time.rs create mode 100644 crates/soapberry-zip/src/utils.rs create mode 100644 crates/soapberry-zip/src/writer.rs create mode 100644 crates/xml-minifier/Cargo.toml create mode 100644 crates/xml-minifier/README.md create mode 100644 crates/xml-minifier/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index 82ee752..8d19037 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,4 @@ + [package] name = "litchi" version = "0.0.1" @@ -41,6 +42,87 @@ exclude = [ ".git/*", ".gitignore", ] +[workspace] +resolver = "3" +members = ["crates/pyo3-litchi", "crates/soapberry-zip", "crates/xml-minifier"] + +[workspace.package] +version = "0.0.1" +edition = "2024" +authors = ["Ryker Zhu "] +license = "Apache-2.0" +repository = "https://github.com/DevExzh/litchi" +rust-version = "1.85" + +[workspace.dependencies] +aes = "0.8" +aho-corasick = "1.1" +allsorts = "0.16" +atoi_simd = "0.18" +base64 = "0.22" +bitflags = { version = "2.10", features = ["std", "serde"] } +bumpalo = { version = "3", features = ["collections"] } +bytes = { version = "1", features = ["serde"] } +cbc = { version = "0.1", features = ["alloc"] } +chrono = { version = "0", features = ["serde"] } +crc-fast = { version = "1.8", features = ["optimize_crc32_auto"] } +encoding_rs = "0.8" +fast-float2 = "0.2" +fixedbitset = "0.5" +flate2 = { version = "1", features = ["zlib-rs"], default-features = false } +font-kit = "0.14" +hmac = "0.13" +image = { version = "0.25", features = ["default-formats", "rayon"] } +itoa = "1.0" +litchi-cfb = { version = "0.0.1", path = "crates/litchi-cfb" } +# Path deps to internal crates. +litchi-core = { version = "0.0.1", path = "crates/litchi-core" } +litchi-eval = { version = "0.0.1", path = "crates/litchi-eval" } +litchi-fonts = { version = "0.0.1", path = "crates/litchi-fonts" } +litchi-formula = { version = "0.0.1", path = "crates/litchi-formula" } +litchi-imgconv = { version = "0.0.1", path = "crates/litchi-imgconv" } +litchi-iwa = { version = "0.0.1", path = "crates/litchi-iwa" } +litchi-markdown = { version = "0.0.1", path = "crates/litchi-markdown" } +litchi-odf = { version = "0.0.1", path = "crates/litchi-odf" } +litchi-ole = { version = "0.0.1", path = "crates/litchi-ole" } +litchi-ooxml = { version = "0.0.1", path = "crates/litchi-ooxml" } +litchi-opc = { version = "0.0.1", path = "crates/litchi-opc" } +litchi-rtf = { version = "0.0.1", path = "crates/litchi-rtf" } +memchr = "2.7" +num-complex = "0.4" +once_cell = "1" +parking_lot = { version = "0.12", features = ["hardware-lock-elision"] } +phf = { version = "0.13", features = ["macros"] } +plist = "1" +prost = { version = "0.14", features = ["derive"] } +prost-build = "0.14" +prost-types = "0.14" +quick-xml = "0.39" +rand = "0.10" +rayon = "1.11" +reqwest = { version = "0.13", features = ["json"] } +roaring = "0" +rowan = "0.16" +ryu = "1.0" +serde = { version = "1", features = ["derive"] } +serde-saphyr = "0" +sha1 = "0.11" +sha2 = "0.11" +smallvec = "1.15" +snap = "1" +soapberry-zip = { path = "crates/soapberry-zip" } +statrs = "0.18" +sxd-document = "0.3" +sxd-xpath = "0.4" + +# External deps used by ≥2 crates — declared once, inherited by members. +# VERSIONS PRESERVED VERBATIM FROM CURRENT root Cargo.toml. +thiserror = "2.0" +tokio = { version = "1", features = ["full"] } +urlencoding = "2.1" +xml-minifier = { path = "crates/xml-minifier" } +zerocopy = { version = "0.8", features = ["std"] } +zerocopy-derive = "0.8" [features] default = ["ole", "ooxml", "ooxml_encryption", "eval_engine"] @@ -124,14 +206,14 @@ sha1 = { version = "0.11", optional = true } # SHA-1 hashing for Standard 2007 a sha2 = "0.11" # SHA-512 hashing for OOXML password protection smallvec = "1.15" # Stack-allocated vectors for small collections to avoid heap allocations snap = { version = "1", optional = true } # Snappy compression used in iWork file formats (.pages, .numbers, .key) -soapberry-zip = { path = "soapberry-zip", optional = true } # High-performance ZIP archive handling for modern Office formats +soapberry-zip = { path = "crates/soapberry-zip", optional = true } # High-performance ZIP archive handling for modern Office formats statrs = { version = "0.18", optional = true } # Statistics library for statistical functions sxd-document = { version = "0.3", optional = true } sxd-xpath = { version = "0.4", optional = true } thiserror = "2.0" # Convenient derive macros for error types tokio = { version = "1", features = ["full"] } urlencoding = { version = "2.1", optional = true } -xml-minifier = { path = "xml-minifier" } +xml-minifier = { path = "crates/xml-minifier" } zerocopy = { version = "0.8", features = ["std"] } # Safe zero-cost type conversions between bytes and structured data zerocopy-derive = "0.8" # Derive macros for zerocopy traits diff --git a/crates/pyo3-litchi/BUILD.md b/crates/pyo3-litchi/BUILD.md new file mode 100644 index 0000000..85a948d --- /dev/null +++ b/crates/pyo3-litchi/BUILD.md @@ -0,0 +1,215 @@ +# Building litchi-py + +This document provides detailed instructions for building and developing litchi-py. + +## Prerequisites + +1. **Rust** (1.70 or later) + ```bash + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh + ``` + +2. **Python** (3.8 or later) + ```bash + # On macOS with Homebrew + brew install python@3.11 + + # On Ubuntu/Debian + sudo apt install python3 python3-pip python3-venv + ``` + +3. **Maturin** (build tool for PyO3) + ```bash + pip install maturin + ``` + +## Development Build + +For development, use `maturin develop` which builds and installs the package in the current Python environment: + +```bash +cd pyo3-litchi + +# Development build (faster, includes debug symbols) +maturin develop + +# Release build (optimized, slower to compile) +maturin develop --release +``` + +After running this, you can import and use `litchi_py` in your Python code: + +```python +from litchi_py import Document +doc = Document.open("test.docx") +print(doc.text()) +``` + +## Building Wheels + +To build distributable wheel files: + +```bash +cd pyo3-litchi + +# Build for the current platform +maturin build --release + +# The wheel will be in target/wheels/ +ls -lh target/wheels/ +``` + +Install the wheel: + +```bash +pip install target/wheels/litchi_py-*.whl +``` + +## Cross-Platform Builds + +### Using maturin with Docker + +Maturin can build wheels for multiple platforms using Docker: + +```bash +# Install Docker first, then: + +# Build for Linux (manylinux) +maturin build --release --manylinux 2014 + +# Build for multiple Python versions +maturin build --release --interpreter python3.8 python3.9 python3.10 python3.11 python3.12 +``` + +### Manual Cross-Compilation + +For cross-compilation, you'll need to set up Rust targets: + +```bash +# For macOS (Intel) +rustup target add x86_64-apple-darwin + +# For macOS (Apple Silicon) +rustup target add aarch64-apple-darwin + +# For Windows +rustup target add x86_64-pc-windows-msvc + +# For Linux +rustup target add x86_64-unknown-linux-gnu +``` + +Then build with the target: + +```bash +maturin build --release --target x86_64-apple-darwin +``` + +## Testing + +### Running Examples + +```bash +cd pyo3-litchi + +# Make sure the package is installed +maturin develop --release + +# Run examples +python examples/document_example.py +python examples/presentation_example.py +python examples/workbook_example.py +python examples/format_detection.py +``` + +### Running Tests + +If you have pytest installed: + +```bash +pip install pytest + +# Create a tests directory with your test files +mkdir -p tests +# ... add test files ... + +pytest tests/ +``` + +## Performance Profiling + +To profile the Rust code: + +```bash +# Build with profiling symbols +RUSTFLAGS="-C force-frame-pointers=yes" maturin develop --release + +# Use your preferred profiler (e.g., py-spy for Python, perf for Linux) +pip install py-spy +py-spy record -o profile.svg -- python your_script.py +``` + +## Troubleshooting + +### "No module named 'litchi_py'" + +Make sure you've run `maturin develop` in the correct directory and your Python environment is activated. + +### Compilation Errors + +1. **Missing Rust**: Install Rust from https://rustup.rs +2. **Outdated Rust**: Run `rustup update` +3. **Missing dependencies**: Make sure the parent `litchi` library compiles successfully + +### Linker Errors on macOS + +If you get linker errors related to C++ on macOS, install Xcode Command Line Tools: + +```bash +xcode-select --install +``` + +### Permission Errors + +On Linux, if you get permission errors when installing: + +```bash +# Use a virtual environment (recommended) +python3 -m venv venv +source venv/bin/activate +pip install maturin +maturin develop --release +``` + +## Publishing to PyPI + +To publish to PyPI (for maintainers): + +```bash +# Build wheels for all platforms +maturin build --release --manylinux 2014 + +# Upload to PyPI +maturin publish --username __token__ --password $PYPI_TOKEN +``` + +## IDE Setup + +### VS Code + +Install these extensions: +- Python (ms-python.python) +- rust-analyzer (rust-lang.rust-analyzer) +- PyO3 (ms-python.vscode-pylance for type checking) + +### PyCharm + +1. Enable type checking: Settings → Editor → Inspections → Python → Type Checker +2. Mark `python/` as a Sources Root + +## Additional Resources + +- [PyO3 User Guide](https://pyo3.rs/) +- [Maturin Documentation](https://github.com/PyO3/maturin) +- [Rust Documentation](https://doc.rust-lang.org/) + diff --git a/crates/pyo3-litchi/Cargo.toml b/crates/pyo3-litchi/Cargo.toml new file mode 100644 index 0000000..9f4495b --- /dev/null +++ b/crates/pyo3-litchi/Cargo.toml @@ -0,0 +1,34 @@ +[package] +name = "litchi-py" +version = "0.0.1" +edition = "2024" + +[lib] +name = "litchi_py" +crate-type = ["cdylib"] + +[dependencies] + +# The main litchi library +litchi = { path = "../..", default-features = false, features = [ + "ole", + "ooxml", + "odf", + "iwa", + "rtf", + "formula", + "imgconv", +] } +# PyO3 for Python bindings +pyo3 = { version = "0.27", features = ["extension-module", "abi3-py38"] } + +[profile.release] +# Enable link-time optimization for smaller binary size +lto = true +# Optimize for size +opt-level = 3 +# Strip symbols for smaller binary +strip = true +# Single codegen unit for better optimization +codegen-units = 1 + diff --git a/crates/pyo3-litchi/MANIFEST.in b/crates/pyo3-litchi/MANIFEST.in new file mode 100644 index 0000000..c7f5658 --- /dev/null +++ b/crates/pyo3-litchi/MANIFEST.in @@ -0,0 +1,6 @@ +# Include type stubs +include python/litchi_py/__init__.pyi +include python/litchi_py/py.typed + +# Include documentation +include README.md diff --git a/crates/pyo3-litchi/README.md b/crates/pyo3-litchi/README.md new file mode 100644 index 0000000..9c4794b --- /dev/null +++ b/crates/pyo3-litchi/README.md @@ -0,0 +1,270 @@ +# litchi-py - Python Bindings for Litchi + +High-performance Python bindings for the Litchi Office file format parser. Parse Word documents, PowerPoint presentations, and Excel workbooks with ease. + +## Features + +- **Fast**: Built on Rust with zero-copy parsing and SIMD optimizations +- **Universal**: Supports legacy and modern Office formats (.doc/.docx, .ppt/.pptx, .xls/.xlsx) +- **Easy to Use**: Pythonic API inspired by python-docx and python-pptx +- **Type Safe**: Complete type stubs for excellent IDE support +- **Cross-Platform**: Works on Linux, macOS, and Windows +- **Python 3.8+**: Compatible with Python 3.8 and above using abi3 + +## Installation + +### From PyPI (when published) + +```bash +pip install litchi-py +``` + +### From Source + +```bash +# Install maturin +pip install maturin + +# Build and install in development mode +cd pyo3-litchi +maturin develop --release + +# Or build a wheel +maturin build --release +pip install target/wheels/*.whl +``` + +## Quick Start + +### Reading Word Documents + +```python +from litchi_py import Document + +# Open any Word document (.doc or .docx) - format auto-detected +doc = Document.open("document.docx") + +# Extract all text +text = doc.text() +print(text) + +# Access paragraphs +for para in doc.paragraphs(): + print(f"Paragraph: {para.text()}") + + # Access runs with formatting + for run in para.runs(): + print(f" Text: {run.text()}") + if run.bold(): + print(" (bold)") + +# Access tables +for table in doc.tables(): + print(f"Table with {table.row_count()} rows") + for row in table.rows(): + for cell in row.cells(): + print(f" Cell: {cell.text()}") +``` + +### Reading PowerPoint Presentations + +```python +from litchi_py import Presentation + +# Open any PowerPoint presentation (.ppt or .pptx) +pres = Presentation.open("presentation.pptx") + +# Extract all text +text = pres.text() +print(text) + +# Get slide count +print(f"Total slides: {pres.slide_count()}") + +# Access individual slides +for i, slide in enumerate(pres.slides()): + print(f"Slide {i + 1}: {slide.text()}") +``` + +### Reading Excel Workbooks + +```python +from litchi_py import Workbook + +# Open an Excel workbook (.xls, .xlsx, .xlsb) +wb = Workbook.open("workbook.xlsx") + +# Get worksheet count +print(f"Worksheets: {wb.worksheet_count()}") + +# Access worksheets +for ws in wb.worksheets(): + print(f"Sheet: {ws.name()}") + print(f" Rows: {ws.row_count()}") + print(f" Cols: {ws.column_count()}") + + # Get cell value + value = ws.cell_value(0, 0) # Row 0, Column 0 + if value: + print(f" A1: {value}") + + # Get all rows + for row in ws.rows(): + print(row) + +# Get worksheet by name +sheet = wb.worksheet_by_name("Sheet1") +if sheet: + print(f"Found sheet: {sheet.name()}") +``` + +### Format Detection + +```python +from litchi_py import detect_file_format, FileFormat + +# Detect format from file path +fmt = detect_file_format("document.docx") +print(fmt) # FileFormat.Docx + +# Detect format from bytes +with open("presentation.pptx", "rb") as f: + data = f.read() + fmt = detect_file_format_from_bytes(data) + print(fmt) # FileFormat.Pptx +``` + +## API Reference + +### Document API + +- **`Document.open(path)`**: Open a Word document +- **`Document.text()`**: Extract all text +- **`Document.paragraphs()`**: Get all paragraphs +- **`Document.tables()`**: Get all tables +- **`Paragraph.text()`**: Get paragraph text +- **`Paragraph.runs()`**: Get text runs +- **`Run.text()`**: Get run text +- **`Run.bold()`**: Check if text is bold +- **`Run.italic()`**: Check if text is italic +- **`Run.underline()`**: Check if text is underlined +- **`Table.row_count()`**: Get number of rows +- **`Table.rows()`**: Get all rows +- **`TableRow.cells()`**: Get all cells +- **`TableCell.text()`**: Get cell text + +### Presentation API + +- **`Presentation.open(path)`**: Open a PowerPoint presentation +- **`Presentation.text()`**: Extract all text +- **`Presentation.slide_count()`**: Get number of slides +- **`Presentation.slides()`**: Get all slides +- **`Slide.text()`**: Get slide text + +### Workbook API + +- **`Workbook.open(path)`**: Open an Excel workbook +- **`Workbook.worksheet_count()`**: Get number of worksheets +- **`Workbook.worksheets()`**: Get all worksheets +- **`Workbook.worksheet_by_name(name)`**: Get worksheet by name +- **`Worksheet.name()`**: Get worksheet name +- **`Worksheet.row_count()`**: Get number of rows +- **`Worksheet.column_count()`**: Get number of columns +- **`Worksheet.cell_value(row, col)`**: Get cell value +- **`Worksheet.rows()`**: Get all rows + +### Utility Functions + +- **`detect_file_format(path)`**: Detect file format from path +- **`detect_file_format_from_bytes(data)`**: Detect format from bytes + +## Supported Formats + +| Format | Extension | Read Support | +|--------|-----------|--------------| +| Microsoft Word 97-2003 | .doc | ✅ | +| Microsoft Word 2007+ | .docx | ✅ | +| Microsoft PowerPoint 97-2003 | .ppt | ✅ | +| Microsoft PowerPoint 2007+ | .pptx | ✅ | +| Microsoft Excel 97-2003 | .xls | ✅ | +| Microsoft Excel 2007+ | .xlsx | ✅ | +| Microsoft Excel Binary | .xlsb | ✅ | +| OpenDocument Text | .odt | ✅ | +| OpenDocument Spreadsheet | .ods | ✅ | +| OpenDocument Presentation | .odp | ✅ | +| Apple Pages | .pages | ✅ | +| Apple Keynote | .key | ✅ | +| Apple Numbers | .numbers | ✅ | +| Rich Text Format | .rtf | ✅ | + +## Performance + +Litchi is built on Rust and uses: +- Zero-copy parsing where possible +- SIMD instructions for text processing +- Efficient memory management +- Parallel processing for large files + +This results in performance that's often **10-100x faster** than pure Python implementations. + +## Development + +### Building from Source + +```bash +# Install dependencies +pip install maturin + +# Build in development mode (faster, with debug symbols) +maturin develop + +# Build in release mode (optimized) +maturin develop --release + +# Run tests +pytest tests/ +``` + +### Project Structure + +``` +pyo3-litchi/ +├── src/ +│ ├── lib.rs # Main module entry point +│ ├── common.rs # Common types and utilities +│ ├── document.rs # Document API bindings +│ ├── presentation.rs # Presentation API bindings +│ └── sheet.rs # Workbook API bindings +├── python/ +│ └── litchi_py/ +│ ├── __init__.pyi # Type stubs +│ └── py.typed # PEP 561 marker +├── Cargo.toml # Rust dependencies +├── pyproject.toml # Python project config +└── README.md # This file +``` + +## Type Checking + +Full type stubs are included for excellent IDE support and type checking with mypy: + +```bash +pip install mypy +mypy your_script.py +``` + +## License + +This project is licensed under Apache License, Version 2.0 ([LICENSE](../LICENSE)) + +## Contributing + +Contributions are welcome! Please feel free to submit a Pull Request. + +## Related Projects + +- [Litchi](https://github.com/DevExzh/litchi) - The main Rust library +- [python-docx](https://python-docx.readthedocs.io/) - Pure Python DOCX library +- [python-pptx](https://python-pptx.readthedocs.io/) - Pure Python PPTX library +- [openpyxl](https://openpyxl.readthedocs.io/) - Pure Python XLSX library + diff --git a/crates/pyo3-litchi/pyproject.toml b/crates/pyo3-litchi/pyproject.toml new file mode 100644 index 0000000..937640d --- /dev/null +++ b/crates/pyo3-litchi/pyproject.toml @@ -0,0 +1,41 @@ +[build-system] +requires = ["maturin>=1.0,<2.0"] +build-backend = "maturin" + +[project] +name = "litchi-py" +version = "0.0.1" +description = "Python bindings for Litchi - High-performance Office file format parser" +readme = "README.md" +authors = [ + { name = "Ryker Zhu" } +] +license = { text = "Apache-2.0" } +keywords = ["office", "docx", "xlsx", "pptx", "parser"] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Rust", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Software Development :: Libraries", + "Topic :: Text Processing", +] +requires-python = ">=3.8" + +[project.urls] +Homepage = "https://github.com/DevExzh/litchi" +Repository = "https://github.com/DevExzh/litchi" +Documentation = "https://docs.rs/litchi" + +[tool.maturin] +# Use abi3 for Python 3.8+ compatibility +# Include type stubs +include = ["python/litchi_py/__init__.pyi", "python/litchi_py/py.typed"] + diff --git a/crates/pyo3-litchi/python/litchi_py/__init__.pyi b/crates/pyo3-litchi/python/litchi_py/__init__.pyi new file mode 100644 index 0000000..7e3807b --- /dev/null +++ b/crates/pyo3-litchi/python/litchi_py/__init__.pyi @@ -0,0 +1,421 @@ +""" +Litchi - High-performance Office file format parser + +Type stubs for the litchi_py Python extension module. +""" + +from pathlib import Path +from typing import Optional, List +from enum import Enum + +class FileFormat(Enum): + """File format enumeration + + Represents the different Office file formats supported by Litchi. + """ + Doc: int # Microsoft Word 97-2003 (.doc) + Docx: int # Microsoft Word 2007+ (.docx) + Ppt: int # Microsoft PowerPoint 97-2003 (.ppt) + Pptx: int # Microsoft PowerPoint 2007+ (.pptx) + Xls: int # Microsoft Excel 97-2003 (.xls) + Xlsx: int # Microsoft Excel 2007+ (.xlsx) + Xlsb: int # Microsoft Excel Binary 2007+ (.xlsb) + Odt: int # OpenDocument Text (.odt) + Ods: int # OpenDocument Spreadsheet (.ods) + Odp: int # OpenDocument Presentation (.odp) + Pages: int # Apple Pages (.pages) + Keynote: int # Apple Keynote (.key) + Numbers: int # Apple Numbers (.numbers) + Rtf: int # Rich Text Format (.rtf) + +class RGBColor: + """RGB color representation + + Represents a color in RGB format with values from 0-255. + """ + + def __init__(self, r: int, g: int, b: int) -> None: + """Create a new RGB color + + Args: + r: Red component (0-255) + g: Green component (0-255) + b: Blue component (0-255) + """ + ... + + @property + def r(self) -> int: + """Red component (0-255)""" + ... + + @property + def g(self) -> int: + """Green component (0-255)""" + ... + + @property + def b(self) -> int: + """Blue component (0-255)""" + ... + +class Length: + """Length with units + + Represents a measurement with associated units (EMUs, points, inches, etc.). + """ + + @staticmethod + def from_emus(emus: int) -> Length: + """Create a length from EMUs (English Metric Units) + + Args: + emus: Length in EMUs (914400 EMUs = 1 inch) + """ + ... + + @staticmethod + def from_points(points: float) -> Length: + """Create a length from points + + Args: + points: Length in points (72 points = 1 inch) + """ + ... + + @staticmethod + def from_inches(inches: float) -> Length: + """Create a length from inches + + Args: + inches: Length in inches + """ + ... + + def to_emus(self) -> int: + """Convert to EMUs""" + ... + + def to_points(self) -> float: + """Convert to points""" + ... + + def to_inches(self) -> float: + """Convert to inches""" + ... + +def detect_file_format(path: Path | str) -> Optional[FileFormat]: + """Detect file format from file path + + Args: + path: Path to the file + + Returns: + The detected FileFormat, or None if format cannot be determined + """ + ... + +def detect_file_format_from_bytes(data: bytes) -> Optional[FileFormat]: + """Detect file format from bytes + + Args: + data: File content as bytes + + Returns: + The detected FileFormat, or None if format cannot be determined + """ + ... + +# Document API + +class Run: + """A run of text with consistent formatting + + Represents a contiguous section of text that shares the same formatting properties. + """ + + def text(self) -> str: + """Extract text from the run""" + ... + + def bold(self) -> Optional[bool]: + """Check if the run is bold + + Returns: + True if bold, False if not bold, None if unspecified + """ + ... + + def italic(self) -> Optional[bool]: + """Check if the run is italic + + Returns: + True if italic, False if not italic, None if unspecified + """ + ... + + def underline(self) -> Optional[bool]: + """Check if the run is underlined + + Returns: + True if underlined, False if not underlined, None if unspecified + """ + ... + +class Paragraph: + """A paragraph in a document + + Represents a single paragraph with text and formatting. + """ + + def text(self) -> str: + """Extract text from the paragraph""" + ... + + def runs(self) -> List[Run]: + """Get all runs in the paragraph + + A run is a contiguous section of text with the same formatting. + + Returns: + List of Run objects + """ + ... + +class TableCell: + """A cell in a table + + Represents a single cell containing text and possibly other content. + """ + + def text(self) -> str: + """Extract text from the cell""" + ... + +class TableRow: + """A row in a table + + Represents a single row containing cells. + """ + + def cells(self) -> List[TableCell]: + """Get all cells in the row""" + ... + +class Table: + """A table in a document + + Represents a table with rows and cells. + """ + + def row_count(self) -> int: + """Get the number of rows in the table""" + ... + + def rows(self) -> List[TableRow]: + """Get all rows in the table""" + ... + +class Document: + """Unified Word document interface + + Provides format-agnostic interface for both .doc and .docx files. + The format is automatically detected when opening a file. + + Example: + >>> from litchi_py import Document + >>> doc = Document.open("document.docx") + >>> text = doc.text() + >>> for para in doc.paragraphs(): + ... print(para.text()) + """ + + @staticmethod + def open(path: Path | str) -> Document: + """Open a Word document from a file path + + The file format (.doc or .docx) is automatically detected. + + Args: + path: Path to the document file + + Returns: + Document instance + + Raises: + IOError: If the file cannot be read + ValueError: If the file format is invalid or unsupported + """ + ... + + def text(self) -> str: + """Extract all text from the document + + Returns: + All text content as a single string + """ + ... + + def paragraphs(self) -> List[Paragraph]: + """Get all paragraphs in the document + + Returns: + List of Paragraph objects + """ + ... + + def tables(self) -> List[Table]: + """Get all tables in the document + + Returns: + List of Table objects + """ + ... + +# Presentation API + +class Slide: + """A slide in a presentation + + Represents a single slide with text and shapes. + """ + + def text(self) -> str: + """Extract all text from the slide""" + ... + +class Presentation: + """Unified PowerPoint presentation interface + + Provides format-agnostic interface for both .ppt and .pptx files. + The format is automatically detected when opening a file. + + Example: + >>> from litchi_py import Presentation + >>> pres = Presentation.open("presentation.pptx") + >>> print(f"Slides: {pres.slide_count()}") + >>> for slide in pres.slides(): + ... print(slide.text()) + """ + + @staticmethod + def open(path: Path | str) -> Presentation: + """Open a PowerPoint presentation from a file path + + The file format (.ppt or .pptx) is automatically detected. + + Args: + path: Path to the presentation file + + Returns: + Presentation instance + + Raises: + IOError: If the file cannot be read + ValueError: If the file format is invalid or unsupported + """ + ... + + def text(self) -> str: + """Extract all text from the presentation + + Returns: + All text content from all slides as a single string + """ + ... + + def slide_count(self) -> int: + """Get the number of slides in the presentation""" + ... + + def slides(self) -> List[Slide]: + """Get all slides in the presentation + + Returns: + List of Slide objects + """ + ... + +# Sheet API + +class Worksheet: + """A worksheet in a workbook + + Note: This is a placeholder for future worksheet-level API. + Currently, use Workbook.worksheet_names() and Workbook.text() for data access. + """ + pass + +class Workbook: + """Excel workbook interface + + Provides support for Excel workbooks in various formats + (.xls, .xlsx, .xlsb, .ods, .numbers). + The format is automatically detected when opening a file. + + Example: + >>> from litchi_py import Workbook + >>> wb = Workbook.open("workbook.xlsx") + >>> print(f"Worksheets: {wb.worksheet_count()}") + >>> for name in wb.worksheet_names(): + ... print(f"Sheet: {name}") + >>> text = wb.text() + >>> print(text) + """ + + @staticmethod + def open(path: Path | str) -> Workbook: + """Open an Excel workbook from a file path + + The file format (.xls, .xlsx, .xlsb, .ods, .numbers) is automatically detected. + + Args: + path: Path to the workbook file + + Returns: + Workbook instance + + Raises: + IOError: If the file cannot be read + ValueError: If the file format is invalid or unsupported + """ + ... + + def worksheet_count(self) -> int: + """Get the number of worksheets in the workbook""" + ... + + def worksheet_names(self) -> List[str]: + """Get all worksheet names + + Returns: + List of worksheet names + """ + ... + + def text(self) -> str: + """Extract all text from all worksheets + + Returns: + All text content as a single string + """ + ... + +__all__ = [ + "FileFormat", + "RGBColor", + "Length", + "detect_file_format", + "detect_file_format_from_bytes", + "Document", + "Paragraph", + "Run", + "Table", + "TableRow", + "TableCell", + "Presentation", + "Slide", + "Workbook", + "Worksheet", +] + diff --git a/crates/pyo3-litchi/python/litchi_py/py.typed b/crates/pyo3-litchi/python/litchi_py/py.typed new file mode 100644 index 0000000..c079667 --- /dev/null +++ b/crates/pyo3-litchi/python/litchi_py/py.typed @@ -0,0 +1,2 @@ +# Marker file for PEP 561 + diff --git a/crates/pyo3-litchi/src/common.rs b/crates/pyo3-litchi/src/common.rs new file mode 100644 index 0000000..4d23234 --- /dev/null +++ b/crates/pyo3-litchi/src/common.rs @@ -0,0 +1,261 @@ +//! Common types and utilities + +use pyo3::exceptions::{PyException, PyIOError, PyValueError}; +use pyo3::prelude::*; +use pyo3::types::PyModule; +use std::path::PathBuf; + +/// Registers common types with the Python module +pub fn register(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_function(wrap_pyfunction!(detect_file_format, m)?)?; + m.add_function(wrap_pyfunction!(detect_file_format_from_bytes, m)?)?; + Ok(()) +} + +/// Converts a Rust litchi::Error to a Python exception +pub fn to_py_err(err: litchi::Error) -> PyErr { + match err { + litchi::Error::Io(e) => PyIOError::new_err(e.to_string()), + litchi::Error::InvalidFormat(msg) => PyValueError::new_err(msg), + litchi::Error::Unsupported(msg) => PyValueError::new_err(msg), + _ => PyException::new_err(err.to_string()), + } +} + +/// Converts a boxed error to a Python exception +pub fn boxed_err_to_py_err(err: Box) -> PyErr { + PyException::new_err(err.to_string()) +} + +/// File format enumeration +/// +/// Represents the different Office file formats supported by Litchi. +#[pyclass] +#[derive(Clone, Copy, Debug)] +pub enum FileFormat { + /// Microsoft Word 97-2003 (.doc) + Doc, + /// Microsoft Word 2007+ (.docx) + Docx, + /// Microsoft PowerPoint 97-2003 (.ppt) + Ppt, + /// Microsoft PowerPoint 2007+ (.pptx) + Pptx, + /// Microsoft Excel 97-2003 (.xls) + Xls, + /// Microsoft Excel 2007+ (.xlsx) + Xlsx, + /// Microsoft Excel Binary 2007+ (.xlsb) + Xlsb, + /// OpenDocument Text (.odt) + Odt, + /// OpenDocument Spreadsheet (.ods) + Ods, + /// OpenDocument Presentation (.odp) + Odp, + /// Apple Pages (.pages) + Pages, + /// Apple Keynote (.key) + Keynote, + /// Apple Numbers (.numbers) + Numbers, + /// Rich Text Format (.rtf) + Rtf, +} + +impl From for FileFormat { + fn from(fmt: litchi::FileFormat) -> Self { + match fmt { + litchi::FileFormat::Doc => FileFormat::Doc, + litchi::FileFormat::Docx => FileFormat::Docx, + litchi::FileFormat::Ppt => FileFormat::Ppt, + litchi::FileFormat::Pptx => FileFormat::Pptx, + litchi::FileFormat::Xls => FileFormat::Xls, + litchi::FileFormat::Xlsx => FileFormat::Xlsx, + litchi::FileFormat::Xlsb => FileFormat::Xlsb, + litchi::FileFormat::Odt => FileFormat::Odt, + litchi::FileFormat::Ods => FileFormat::Ods, + litchi::FileFormat::Odp => FileFormat::Odp, + litchi::FileFormat::Pages => FileFormat::Pages, + litchi::FileFormat::Keynote => FileFormat::Keynote, + litchi::FileFormat::Numbers => FileFormat::Numbers, + litchi::FileFormat::Rtf => FileFormat::Rtf, + } + } +} + +#[pymethods] +impl FileFormat { + /// Returns the string representation of the format + fn __str__(&self) -> &'static str { + match self { + FileFormat::Doc => "Doc", + FileFormat::Docx => "Docx", + FileFormat::Ppt => "Ppt", + FileFormat::Pptx => "Pptx", + FileFormat::Xls => "Xls", + FileFormat::Xlsx => "Xlsx", + FileFormat::Xlsb => "Xlsb", + FileFormat::Odt => "Odt", + FileFormat::Ods => "Ods", + FileFormat::Odp => "Odp", + FileFormat::Pages => "Pages", + FileFormat::Keynote => "Keynote", + FileFormat::Numbers => "Numbers", + FileFormat::Rtf => "Rtf", + } + } + + fn __repr__(&self) -> String { + format!("FileFormat.{}", self.__str__()) + } +} + +/// RGB color representation +/// +/// Represents a color in RGB format with values from 0-255. +#[pyclass] +#[derive(Clone, Debug)] +pub struct RGBColor { + inner: litchi::RGBColor, +} + +#[pymethods] +impl RGBColor { + /// Create a new RGB color + /// + /// Args: + /// r: Red component (0-255) + /// g: Green component (0-255) + /// b: Blue component (0-255) + #[new] + fn new(r: u8, g: u8, b: u8) -> Self { + RGBColor { + inner: litchi::RGBColor::new(r, g, b), + } + } + + /// Red component (0-255) + #[getter] + fn r(&self) -> u8 { + self.inner.r + } + + /// Green component (0-255) + #[getter] + fn g(&self) -> u8 { + self.inner.g + } + + /// Blue component (0-255) + #[getter] + fn b(&self) -> u8 { + self.inner.b + } + + fn __str__(&self) -> String { + format!("RGB({}, {}, {})", self.r(), self.g(), self.b()) + } + + fn __repr__(&self) -> String { + format!("RGBColor({}, {}, {})", self.r(), self.g(), self.b()) + } +} + +/// Length with units +/// +/// Represents a measurement with associated units (EMUs, points, inches, etc.). +#[pyclass] +#[derive(Clone, Debug)] +pub struct Length { + inner: litchi::Length, +} + +#[pymethods] +impl Length { + /// Create a length from EMUs (English Metric Units) + /// + /// Args: + /// emus: Length in EMUs (914400 EMUs = 1 inch) + #[staticmethod] + fn from_emus(emus: i64) -> Self { + Length { + inner: litchi::Length::from_emus(emus), + } + } + + /// Create a length from points + /// + /// Args: + /// points: Length in points (72 points = 1 inch) + #[staticmethod] + fn from_points(points: f64) -> Self { + Length { + inner: litchi::Length::from_inches(points / 72.0), + } + } + + /// Create a length from inches + /// + /// Args: + /// inches: Length in inches + #[staticmethod] + fn from_inches(inches: f64) -> Self { + Length { + inner: litchi::Length::from_inches(inches), + } + } + + /// Convert to EMUs + fn to_emus(&self) -> i64 { + self.inner.emus() + } + + /// Convert to points + fn to_points(&self) -> f64 { + self.inner.points() + } + + /// Convert to inches + fn to_inches(&self) -> f64 { + self.inner.inches() + } + + fn __str__(&self) -> String { + format!("{:.2} pt", self.to_points()) + } + + fn __repr__(&self) -> String { + format!("Length({} EMUs)", self.to_emus()) + } +} + +/// Detect file format from file path +/// +/// Args: +/// path: Path to the file +/// +/// Returns: +/// The detected FileFormat, or None if format cannot be determined +#[pyfunction] +fn detect_file_format(path: PathBuf) -> PyResult> { + match litchi::detect_file_format(&path) { + Some(fmt) => Ok(Some(FileFormat::from(fmt))), + None => Ok(None), + } +} + +/// Detect file format from bytes +/// +/// Args: +/// data: File content as bytes +/// +/// Returns: +/// The detected FileFormat, or None if format cannot be determined +#[pyfunction] +fn detect_file_format_from_bytes(data: &[u8]) -> Option { + litchi::detect_file_format_from_bytes(data).map(FileFormat::from) +} diff --git a/crates/pyo3-litchi/src/document.rs b/crates/pyo3-litchi/src/document.rs new file mode 100644 index 0000000..0570395 --- /dev/null +++ b/crates/pyo3-litchi/src/document.rs @@ -0,0 +1,299 @@ +//! Document API bindings + +use pyo3::prelude::*; +use pyo3::types::PyModule; +use std::path::PathBuf; +use std::sync::Arc; + +use crate::common::to_py_err; + +/// Registers document types with the Python module +pub fn register(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + Ok(()) +} + +/// Unified Word document interface +/// +/// Provides format-agnostic interface for both .doc and .docx files. +/// The format is automatically detected when opening a file. +/// +/// # Examples +/// +/// ```python +/// from litchi_py import Document +/// +/// # Open any Word document (.doc or .docx) +/// doc = Document.open("document.docx") +/// +/// # Extract all text +/// text = doc.text() +/// print(text) +/// +/// # Access paragraphs +/// for para in doc.paragraphs(): +/// print(f"Paragraph: {para.text()}") +/// ``` +#[pyclass(unsendable)] +pub struct Document { + inner: Arc, +} + +#[pymethods] +impl Document { + /// Open a Word document from a file path + /// + /// The file format (.doc or .docx) is automatically detected. + /// + /// Args: + /// path: Path to the document file + /// + /// Returns: + /// Document instance + /// + /// Raises: + /// IOError: If the file cannot be read + /// ValueError: If the file format is invalid or unsupported + #[staticmethod] + #[allow(clippy::arc_with_non_send_sync)] // reason: PyO3 manages thread access via the GIL; Arc here is intra-thread refcounting only + fn open(path: PathBuf) -> PyResult { + let doc = litchi::Document::open(path).map_err(to_py_err)?; + Ok(Document { + inner: Arc::new(doc), + }) + } + + /// Extract all text from the document + /// + /// Returns: + /// All text content as a single string + fn text(&self) -> PyResult { + self.inner.text().map_err(to_py_err) + } + + /// Get all paragraphs in the document + /// + /// Returns: + /// List of Paragraph objects + fn paragraphs(&self) -> PyResult> { + let paras = self.inner.paragraphs().map_err(to_py_err)?; + Ok(paras + .into_iter() + .map(|p| Paragraph { inner: Arc::new(p) }) + .collect()) + } + + /// Get all tables in the document + /// + /// Returns: + /// List of Table objects + fn tables(&self) -> PyResult> { + let tables = self.inner.tables().map_err(to_py_err)?; + Ok(tables + .into_iter() + .map(|t| Table { inner: Arc::new(t) }) + .collect()) + } + + fn __repr__(&self) -> String { + "".to_string() + } +} + +/// A paragraph in a document +/// +/// Represents a single paragraph with text and formatting. +#[pyclass(unsendable)] +pub struct Paragraph { + inner: Arc, +} + +#[pymethods] +impl Paragraph { + /// Extract text from the paragraph + /// + /// Returns: + /// Paragraph text as a string + fn text(&self) -> PyResult { + self.inner.text().map_err(to_py_err) + } + + /// Get all runs in the paragraph + /// + /// A run is a contiguous section of text with the same formatting. + /// + /// Returns: + /// List of Run objects + fn runs(&self) -> PyResult> { + let runs = self.inner.runs().map_err(to_py_err)?; + Ok(runs + .into_iter() + .map(|r| Run { inner: Arc::new(r) }) + .collect()) + } + + fn __repr__(&self) -> PyResult { + let text = self.text().unwrap_or_default(); + let preview = if text.len() > 50 { + format!("{}...", &text[..50]) + } else { + text + }; + Ok(format!("", preview)) + } +} + +/// A run of text with consistent formatting +/// +/// Represents a contiguous section of text that shares the same formatting properties. +#[pyclass(unsendable)] +pub struct Run { + inner: Arc, +} + +#[pymethods] +impl Run { + /// Extract text from the run + /// + /// Returns: + /// Run text as a string + fn text(&self) -> PyResult { + self.inner.text().map_err(to_py_err) + } + + /// Check if the run is bold + /// + /// Returns: + /// True if bold, False if not bold, None if unspecified + fn bold(&self) -> PyResult> { + self.inner.bold().map_err(to_py_err) + } + + /// Check if the run is italic + /// + /// Returns: + /// True if italic, False if not italic, None if unspecified + fn italic(&self) -> PyResult> { + self.inner.italic().map_err(to_py_err) + } + + /// Check if the run is underlined + /// + /// Returns: + /// True if underlined, False if not underlined, None if unspecified + /// + /// Note: This method may not be supported for all document formats. + fn underline(&self) -> PyResult> { + // The underline() method may not exist on all Run types + // For now, return None as a placeholder + Ok(None) + } + + fn __repr__(&self) -> PyResult { + let text = self.text().unwrap_or_default(); + let preview = if text.len() > 30 { + format!("{}...", &text[..30]) + } else { + text + }; + Ok(format!("", preview)) + } +} + +/// A table in a document +/// +/// Represents a table with rows and cells. +#[pyclass(unsendable)] +pub struct Table { + inner: Arc, +} + +#[pymethods] +impl Table { + /// Get the number of rows in the table + /// + /// Returns: + /// Number of rows + fn row_count(&self) -> PyResult { + self.inner.row_count().map_err(to_py_err) + } + + /// Get all rows in the table + /// + /// Returns: + /// List of TableRow objects + fn rows(&self) -> PyResult> { + let rows = self.inner.rows().map_err(to_py_err)?; + Ok(rows + .into_iter() + .map(|r| TableRow { inner: Arc::new(r) }) + .collect()) + } + + fn __repr__(&self) -> PyResult { + let row_count = self.row_count().unwrap_or(0); + Ok(format!("", row_count)) + } +} + +/// A row in a table +/// +/// Represents a single row containing cells. +#[pyclass(unsendable)] +pub struct TableRow { + inner: Arc, +} + +#[pymethods] +impl TableRow { + /// Get all cells in the row + /// + /// Returns: + /// List of TableCell objects + fn cells(&self) -> PyResult> { + let cells = self.inner.cells().map_err(to_py_err)?; + Ok(cells + .into_iter() + .map(|c| TableCell { inner: Arc::new(c) }) + .collect()) + } + + fn __repr__(&self) -> PyResult { + let cell_count = self.cells().map(|c| c.len()).unwrap_or(0); + Ok(format!("", cell_count)) + } +} + +/// A cell in a table +/// +/// Represents a single cell containing text and possibly other content. +#[pyclass(unsendable)] +pub struct TableCell { + inner: Arc, +} + +#[pymethods] +impl TableCell { + /// Extract text from the cell + /// + /// Returns: + /// Cell text as a string + fn text(&self) -> PyResult { + self.inner.text().map_err(to_py_err) + } + + fn __repr__(&self) -> PyResult { + let text = self.text().unwrap_or_default(); + let preview = if text.len() > 30 { + format!("{}...", &text[..30]) + } else { + text + }; + Ok(format!("", preview)) + } +} diff --git a/crates/pyo3-litchi/src/lib.rs b/crates/pyo3-litchi/src/lib.rs new file mode 100644 index 0000000..e0cac46 --- /dev/null +++ b/crates/pyo3-litchi/src/lib.rs @@ -0,0 +1,83 @@ +//! Python bindings for Litchi - High-performance Office file format parser +//! +//! This module provides Python bindings for the Litchi Rust library using PyO3. + +use pyo3::prelude::*; +use pyo3::types::PyModule; + +mod common; +mod document; +mod presentation; +mod sheet; + +/// Litchi - High-performance Office file format parser +/// +/// This module provides Python bindings for parsing and manipulating various +/// Office file formats including: +/// - Word documents (.doc, .docx) +/// - PowerPoint presentations (.ppt, .pptx) +/// - Excel workbooks (.xls, .xlsx, .xlsb) +/// - OpenDocument formats (.odt, .ods, .odp) +/// - Apple iWork formats (.pages, .key, .numbers) +/// - RTF documents (.rtf) +/// +/// # Examples +/// +/// ## Reading Word Documents +/// +/// ```python +/// from litchi_py import Document +/// +/// # Open a document (auto-detects format) +/// doc = Document.open("document.docx") +/// +/// # Extract text +/// text = doc.text() +/// print(text) +/// +/// # Access paragraphs +/// for para in doc.paragraphs(): +/// print(f"Paragraph: {para.text()}") +/// +/// # Access runs with formatting +/// for run in para.runs(): +/// print(f" Text: {run.text()}") +/// if run.bold(): +/// print(" (bold)") +/// ``` +/// +/// ## Reading PowerPoint Presentations +/// +/// ```python +/// from litchi_py import Presentation +/// +/// # Open a presentation +/// pres = Presentation.open("presentation.pptx") +/// +/// # Extract text +/// text = pres.text() +/// print(text) +/// +/// # Get slide count +/// print(f"Total slides: {pres.slide_count()}") +/// +/// # Access individual slides +/// for i, slide in enumerate(pres.slides()): +/// print(f"Slide {i + 1}: {slide.text()}") +/// ``` +#[pymodule] +fn litchi_py(m: &Bound<'_, PyModule>) -> PyResult<()> { + // Register common types + common::register(m)?; + + // Register document types + document::register(m)?; + + // Register presentation types + presentation::register(m)?; + + // Register sheet types + sheet::register(m)?; + + Ok(()) +} diff --git a/crates/pyo3-litchi/src/presentation.rs b/crates/pyo3-litchi/src/presentation.rs new file mode 100644 index 0000000..d1f556a --- /dev/null +++ b/crates/pyo3-litchi/src/presentation.rs @@ -0,0 +1,131 @@ +//! Presentation API bindings + +use pyo3::prelude::*; +use pyo3::types::PyModule; +use std::path::PathBuf; +use std::sync::Arc; + +use crate::common::to_py_err; + +/// Registers presentation types with the Python module +pub fn register(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + Ok(()) +} + +/// Unified PowerPoint presentation interface +/// +/// Provides format-agnostic interface for both .ppt and .pptx files. +/// The format is automatically detected when opening a file. +/// +/// # Examples +/// +/// ```python +/// from litchi_py import Presentation +/// +/// # Open any PowerPoint presentation (.ppt or .pptx) +/// pres = Presentation.open("presentation.pptx") +/// +/// # Extract all text +/// text = pres.text() +/// print(text) +/// +/// # Get slide count +/// print(f"Total slides: {pres.slide_count()}") +/// +/// # Access individual slides +/// for i, slide in enumerate(pres.slides()): +/// print(f"Slide {i + 1}: {slide.text()}") +/// ``` +#[pyclass(unsendable)] +pub struct Presentation { + inner: Arc, +} + +#[pymethods] +impl Presentation { + /// Open a PowerPoint presentation from a file path + /// + /// The file format (.ppt or .pptx) is automatically detected. + /// + /// Args: + /// path: Path to the presentation file + /// + /// Returns: + /// Presentation instance + /// + /// Raises: + /// IOError: If the file cannot be read + /// ValueError: If the file format is invalid or unsupported + #[staticmethod] + #[allow(clippy::arc_with_non_send_sync)] // reason: PyO3 manages thread access via the GIL; Arc here is intra-thread refcounting only + fn open(path: PathBuf) -> PyResult { + let pres = litchi::Presentation::open(path).map_err(to_py_err)?; + Ok(Presentation { + inner: Arc::new(pres), + }) + } + + /// Extract all text from the presentation + /// + /// Returns: + /// All text content from all slides as a single string + fn text(&self) -> PyResult { + self.inner.text().map_err(to_py_err) + } + + /// Get the number of slides in the presentation + /// + /// Returns: + /// Number of slides + fn slide_count(&self) -> PyResult { + self.inner.slide_count().map_err(to_py_err) + } + + /// Get all slides in the presentation + /// + /// Returns: + /// List of Slide objects + fn slides(&self) -> PyResult> { + let slides = self.inner.slides().map_err(to_py_err)?; + Ok(slides + .into_iter() + .map(|s| Slide { inner: Arc::new(s) }) + .collect()) + } + + fn __repr__(&self) -> PyResult { + let slide_count = self.slide_count().unwrap_or(0); + Ok(format!("", slide_count)) + } +} + +/// A slide in a presentation +/// +/// Represents a single slide with text and shapes. +#[pyclass(unsendable)] +pub struct Slide { + inner: Arc, +} + +#[pymethods] +impl Slide { + /// Extract all text from the slide + /// + /// Returns: + /// All text content as a single string + fn text(&self) -> PyResult { + self.inner.text().map_err(to_py_err) + } + + fn __repr__(&self) -> PyResult { + let text = self.text().unwrap_or_default(); + let preview = if text.len() > 50 { + format!("{}...", &text[..50]) + } else { + text + }; + Ok(format!("", preview)) + } +} diff --git a/crates/pyo3-litchi/src/sheet.rs b/crates/pyo3-litchi/src/sheet.rs new file mode 100644 index 0000000..13fa558 --- /dev/null +++ b/crates/pyo3-litchi/src/sheet.rs @@ -0,0 +1,110 @@ +//! Sheet/Workbook API bindings + +use pyo3::prelude::*; +use pyo3::types::PyModule; +use std::path::PathBuf; +use std::sync::Arc; + +use crate::common::boxed_err_to_py_err; + +/// Registers sheet types with the Python module +pub fn register(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + Ok(()) +} + +/// Excel workbook interface +/// +/// Provides support for Excel workbooks in various formats (.xls, .xlsx, .xlsb). +/// The format is automatically detected when opening a file. +/// +/// # Examples +/// +/// ```python +/// from litchi_py import Workbook +/// +/// # Open an Excel workbook +/// wb = Workbook.open("workbook.xlsx") +/// +/// # Get worksheet count +/// print(f"Worksheets: {wb.worksheet_count()}") +/// +/// # Access worksheet names +/// for name in wb.worksheet_names(): +/// print(f"Sheet: {name}") +/// ``` +#[pyclass(unsendable)] +pub struct Workbook { + inner: Arc, +} + +#[pymethods] +impl Workbook { + /// Open an Excel workbook from a file path + /// + /// The file format (.xls, .xlsx, .xlsb, .ods, .numbers) is automatically detected. + /// + /// Args: + /// path: Path to the workbook file + /// + /// Returns: + /// Workbook instance + /// + /// Raises: + /// IOError: If the file cannot be read + /// ValueError: If the file format is invalid or unsupported + #[staticmethod] + #[allow(clippy::arc_with_non_send_sync)] // reason: PyO3 manages thread access via the GIL; Arc here is intra-thread refcounting only + fn open(path: PathBuf) -> PyResult { + let wb = litchi::sheet::Workbook::open(path).map_err(boxed_err_to_py_err)?; + Ok(Workbook { + inner: Arc::new(wb), + }) + } + + /// Get the number of worksheets in the workbook + /// + /// Returns: + /// Number of worksheets + fn worksheet_count(&self) -> PyResult { + self.inner.worksheet_count().map_err(boxed_err_to_py_err) + } + + /// Get all worksheet names + /// + /// Returns: + /// List of worksheet names + fn worksheet_names(&self) -> PyResult> { + self.inner.worksheet_names().map_err(boxed_err_to_py_err) + } + + /// Extract all text from all worksheets + /// + /// Returns: + /// All text content as a single string + fn text(&self) -> PyResult { + self.inner.text().map_err(boxed_err_to_py_err) + } + + fn __repr__(&self) -> PyResult { + let count = self.worksheet_count().unwrap_or(0); + Ok(format!("", count)) + } +} + +/// A worksheet in a workbook +/// +/// Note: This is a placeholder for future worksheet-level API. +/// Currently, use Workbook.worksheet_names() and Workbook.text() for data access. +#[pyclass] +pub struct Worksheet { + _private: (), +} + +#[pymethods] +impl Worksheet { + fn __repr__(&self) -> String { + "".to_string() + } +} diff --git a/crates/soapberry-zip/Cargo.toml b/crates/soapberry-zip/Cargo.toml new file mode 100644 index 0000000..104e5db --- /dev/null +++ b/crates/soapberry-zip/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "soapberry-zip" +version = "0.0.1" +authors = ["Ryker Zhu "] +edition = "2021" +description = "High-performance ZIP reader for Office document formats (OOXML, ODF, iWork)" +license = "MIT" +keywords = ["zip", "archive", "office"] +categories = ["filesystem", "compression", "parser-implementations"] +rust-version = "1.70" + +[dependencies] +# Hardware-accelerated CRC32 (uses SIMD/PCLMULQDQ when available) +crc32fast = "1" +# Deflate compression - used by all Office formats +flate2 = { version = "1", features = ["zlib-rs"], default-features = false } +# Parallel processing for multi-threaded decompression +rayon = "1" + +[dev-dependencies] +jiff = "0.2" +paste = "1.0" +quickcheck = "1.0.3" +quickcheck_macros = "1.1.0" +rstest = "0.26" diff --git a/crates/soapberry-zip/README.md b/crates/soapberry-zip/README.md new file mode 100644 index 0000000..f47d089 --- /dev/null +++ b/crates/soapberry-zip/README.md @@ -0,0 +1,5 @@ +# Soapberry Zip + +This is a forked version of [nickbabcock/rawzip](https://github.com/nickbabcock/rawzip) that mainly focuses on Office document formats (OOXML, ODF, iWork). We have removed the unneeded features and focused on performance and simplicity. + +Thanks for the original work! The original crate is licensed under MIT and you can obtain it from [here](https://github.com/nickbabcock/rawzip/blob/master/LICENSE.txt). \ No newline at end of file diff --git a/crates/soapberry-zip/assets/crc32-not-streamed.zip b/crates/soapberry-zip/assets/crc32-not-streamed.zip new file mode 100644 index 0000000000000000000000000000000000000000..f268d88732f837723525285c0922231d9c3fcb46 GIT binary patch literal 314 zcmWIWW@h1H0D;u@42Kn|Ms+MeHVCsb$S|bk=j)YJl!S(GGBDo@jr0fM(h6<{MwYLP zKvg0@Wk4ld0dPaofQG!>yod$akfg*SxFHXK27oY{AwVTSLl~Llm~pv90%#Qj1JF{2 wC5<2!+-0l~m!TPmY#64SkPUMM8U}YE&@e2n3-D%T1KG(0gtLHj7l^|E0B`I%6#xJL literal 0 HcmV?d00001 diff --git a/crates/soapberry-zip/assets/filename_mismatch_test.zip b/crates/soapberry-zip/assets/filename_mismatch_test.zip new file mode 100644 index 0000000000000000000000000000000000000000..0e883104b8f1042387c874154ca8b3b12d0507ba GIT binary patch literal 232 zcmWIWW@Zs#U|`^2cx=WK?Y7xwe;$z60>r#PT%4Ge8lRS#ld4xzQDS<=knfNIkK04r z2$3qOK%=q?e^TFD5=VOgu w*aEy6ndF$UJB=IYG#FUI2x8?X=9DKErRt?tqy~7ivVnvcfzT32&jfK80JTp^mjD0& literal 0 HcmV?d00001 diff --git a/crates/soapberry-zip/assets/go-with-datadesc-sig.zip b/crates/soapberry-zip/assets/go-with-datadesc-sig.zip new file mode 100644 index 0000000000000000000000000000000000000000..bcfe121bb63c79be6849ca64589feea612015512 GIT binary patch literal 242 zcmWIWW@Zs#;9$T6>_DNk{CvHViV`5j72wUzu_D!|js++T!U1?SCM6buGy*BG#+REH zu^==uGKnxC>_j#cWG55r00004XF*Lt007q5 z)K6G40000PbVXQnQ*UN;cVTj606}DLVr3vnZDD6+Qe|Oed2z{QJOBU!qe(a@st@4BKo_oJPzw>?HIhVm0L#Ej6 zcCguOm(QI#dLysUSQM1xW#7b?WhI9coK{!icDucq@rulkmX=oZy1mWeXsEqpG#cQz z^?utRB)7Nn2_^KAYm5|#yEXazMjH4t8k!0_NeXY&iE#p@M7{5k^WvsIwt5X66u zgMz_u`%zcjR>QW&SS(65Ye^9(fMuCu1-f+|cW+i$cI({q=d?f-4KkbL8H||JduEK zWEsv^R+I=c^Vc^Q6f|YEz%UF)-b=4uIMaXYiY0$xaS0d}!L(ln-P#;bsW@OrfJ(`M zqjL}(Z9R?iQmBps*cDKe=-Au$LmfdmOt7Tycmj$y<|dN@QaA~tlBZF^f-Lp@sesL# z=v+{~^TgRR$*YtpDuIysfOkp+VMc;zEC#y#TrZp+NA%AD{$()=e}R z3@I8(`tF+qL{{dnWNKppLokAoTC?UfF*Tt`)9K%C@(KhS0IXFLXxD~r|ta@O^5g7&dyB3xG)uXH!$3b5r40` zx_Z)TwPycJf*@p8yGZo8%QIme`{s}EJpZ7(yZbY-h4gwoy>wZv{T5&Vs0a;<1LtDu P00000NkvXXu0mjff_q_+ literal 0 HcmV?d00001 diff --git a/crates/soapberry-zip/assets/omni-mini.ja b/crates/soapberry-zip/assets/omni-mini.ja new file mode 100644 index 0000000000000000000000000000000000000000..1c853fdc7cc2c031f5c0eb8e6f8d73bdd87cb603 GIT binary patch literal 1131 zcmWG#VPFXGW@M6M7GYpu-~eJpn`hsuT5d8kF#Kg_VBiLdfB+*9FKG-0l0~V-`o(4G zddbDb0p6@^5J^S`OCSSi77zq@Gqd0}3`u)y===QJW&(fL?SB-O;PGbGwAXc>PxW31 zBphyF(VmcKZ}5g$HBczMOT^<;-{mj$_mxw3PusiJ;X%Nj$B*ys-F>{oE9caUgHx0i z@4mi1{7L1xZJWOMx}6sFjOBVOX(pS!c=EI?SJu-zC$k!dK5c1Fcks;Uvj4SoVxOFE zKDT4Jg_FI+ZJ}>5^Z)B|`QMw6&r+Ef?Y^u_p8r{-*3p;A^R**cW>iOVygkxmRvdLs zy!h8jCBN2}Wyg)>-)Ekd&3wFa%DL#7-{YoinYypBxrA}qBrb{i8S}W-oqRIS`_JX3 z9m|jX*7Lqxwxc>PXD*ZR(cG7FmSs+=xv)oc-QyEWw!f-+zyEID^V{1m*6xhZ3EI9= zeRc5t?oS!49z=*jehI^Qp)Z29r+!Hk7U*Jqj@^KaOdUA#E0GpDlGJ1Xz`p>vb^ss$FL zd#0|Lu=QW%1GSh$vqNF0a*VT5IDaX>X%Cw+NjSV~hu%x8V~Vn?udd5h`xvvwaGl1K z*wh5?>W@z%e?-ZCpXvDM()(lI4!?WAZgw`R=A*;skN-}5zi%P`Jn4163?oj) z9$x!&_j*fNtAB@H7q7UKYFK%lGjFHf5eKze<=f{X&Tr*k?%=L#dcykl`-(`f*m$X; zWbZ_N&biUsd%rXtJ;rtY=CsxSQ_pjWvrnD6>_xQCuLDmkdS9L{IsYnrQm1M|`83y> z5~bV98zb~;uie>cbo3XG^4v@LOwcwd(~njWFd7RTwph`n5%GB_=#~Z zvrJ0rw)p-&zZcW^iV6hHb-&H*t>jpFoO$mwR+pJiH%*azvR5$d*rS|=93A1%)}x)O z4u3gat2<-A-d)XStNg}o&oAvqy;koMr1#z3f5h+v|Kk^OW+7?~B3@-YT~~UPnzR<^ zEnYiq_t%Wa8cfSSHeF%gCpOWvr&L|$QrUy`2dtO5Wa#WO6q0S5b=89H);@iK4U-M- z7k2jvQ9%8<|BJ2W3hR5~th08CrRCs>cx6J(Xc5`#{(?3!~E{QpwZguji z*elek*7}M`F)QzfX;!$jtq$8}+ZC?;pEfF95`Oz!^13zK_0ke4wKr_6I{5E6;Z zlp!*wB=bHW@8`4k{%7xhe&64*kL`HhLusw$dG7nZuIoI{>%5-3%8KW3dF|^oG)&ux|JT%V{*t1h*-s;T=CrzV$ zq{0l_-=7WTPL8>c|MAJ$B260*MUs6XBjlKJMc$7h+oUMV`?KXZf3i9?*hVfm|E%No ztw+8b5zFmy^Yn`Bon8Dm*(2ukQQvgpLuBhO`XI3(_le@kUvusg*Lr3nf6eV?m)(J@ zprKhh;wrZK2XChbB;xwK6(9aDe=p%9lV5#(@X7rCy?bt7Z*G)a7!$VJnAS4)YiQqO zNa%*G%-oY2f|af_mUL&eJkyq@zP*-vkZ;*CEw{y{BabJw=K`nN()^6u%*e6p-#_G< zHH0aOx3{-9NG@1E;#63>Zk^AqTd5jpnkQp6nkOx2}-_1`&Y@UeN=w#^R z8+a|vR8>*z&f$1ON>j8_MQl2LkECU}if!tS?a)_KRz7Uel(2QLNXT$fqM}6u zKD?^FDUMm@jn7V}X=V(xGTs|(N|e8|ZC~xjkD8l4JU$ZdwX(dyCziTa)L|rK`lPtH z`@)}bjZ}4>*oIqH&y`}9XIc$)`R@k=WN2pSsHz6fPgLVhK12wcy}I>J;@_tjN}{a2)TTQrFKpczP~>ia$45u`toJwf=;Pii)GtnXu^Sp63dC2bH9x zDpwYVq-tX>X6mUJgnx|iAg;0qukLZ1l$Z$~ZOx)*6?Y!`7&RSZ_A#>f>Nm=GN4`9_ zcV6DnL5I!)8|8N7;?!&U5wSb3tEw0eX&rW+`Z_gr)upDurt@78o7eK9%UnZY zkE_+<-=8xhEx#z~Cb~b^_O796yrnB6|9Tz0_}BhlEB1DS!SCM%>fA`<1p|BSZU{rX;C8{8nZQf64` zsU)?OXSHxZw(0%DgAI`<;ty%RxFTEXI6lyN+^*-oriv|VXQ|U<`HHYj$5HYdyt)~u zmRBQn{l<;QZ8~mX+jw|*GJVxU;!^ccF?OS3i4joFj`276ybA?=fPT{BZ zd>1pvJ|`*R@6r^?fddDg9JljaS(?q#Fa2UI(& zRa1PjuW@(F&;IJr337~{si`Rk!eB&#OYE=*Qt8Q4z`)HK~~Y;A3w zn8t}TsSBI=@tKR0lXL(6XZC}&0eg?L9=GknEnDjrwKYiC5BN3h|Z@%a6KhjnFtMBc-agoA56W1oXP7Ho# zbDug*KBKDIM5WtleEIUab?f#XHE*o+S~2vTduuB}OU~3Q7IK>C`ZL~nF-4Vwn>$%M z*AyFL^(CDrwM;FlE3?(WO~GZnzyHzW$D*E#&K!GpQM#OZJ#a=c)Kh|08Q8oQb6R8~ zEl)RyJP_N+HeRn|oy9lOmfUDB?o=$-J?22;y-D$w8u!XrLN(z4Lh-NVDsYsn#P zoJG>T$a87_tj1rP!tOsQMwy&eYV_|X8J-qaehBJ=urp=pkUGKfR z^!T`4v`wk|;?L@vH*Y#hC0?!$)QoCuSG2uIWQBkr$$4`X4qRATro;@dnQmk5)`96) z)lsJT;|^!{?d32F91^&gB38bQM)})AQi;60sXcaoW~CI#GrX)TpW0|GGCTY!K3Q#z zHlk`!?LY%3b^5Hl$w36ZTFjU89XqO+k69)^+M561!2@BlkLGvZL|KNjTfFhx9>}Nc zQ4$#ufpeiyJt%f#_}TI>mOVQ;`1I-1wN`1=#+2mrk5h>8q{Sbel*dn>rb$}#6kWS4 zdnTEK+?8t<6cZEEPjAy*l3&8cxKKRe9-nL#Oz|5Y9*)y2{w&e_?Vo@CVQ?6IR&!+@ z^`j-A@2h~VHqyhd>1n-D#j`3ao@A=1+aGHc4nc#mqHkV1cJ2%v6v%sKw)cI$Pmox> zWnn9WgY}IO=L!4JlCDu28rM^lVS(xhSyR*RH`i@!F7&~sC~1%CQ2zD@Nu{n>#T>Kx zQrPQR>hkM5ajK%Mt=qCsyWE@q&^g%atrT^VftfkgI?1vnl}XSbk2ju!9D>?u)A!~s zmQF2b<3Z~1=cJ&pOI<0BGmpctUs;vQ^RIk1&W^PoRQgC3oSm7GxYkG2Gl=T;_4Tcf z5}U0Ptkf&8j%XuI*tWBr5v%+Z{H~xxw&-nNc$hx)Ew)dvddxb45i3 z=ObC#?aa&_c^03Smlnh_%Z+zd*;OuS?n{c4cN4Ed_!XKpL?edO{54RT@Mj6PKV_~n zsqC^YE-oWxObVQzi>~!c&J8?59V@WwHQ4tJ=QQ-|y(K+|3i9ZejLvlJ+zs@s)^6j; z`lXJ-rXQS#WA8L4eMKr(mHgN~5xY%E6EzL-_A&B=TJem|^9q*m=;+Lf1hq=fC9GTq z&IA5wne&ZYt5%#U0$54oyJEb_r7rcX>8Jh5*Ru-!9~^9)<#m)6zS{l z|AoWR$@uv(NyN5G^u!5mm)6J=*F!Wm_?)ggOL0mqmk$@HO{JIod#b#><`EZP8V)3v zpgZm=#1}bTJ^W&jYA&#gOYD4Rho2wqjNpW3t8O!0Y_5uS)HjU`9S5ZRw_l#7CsUPK zO>PXC-n$^pTA1x=Rv&UeB1`I&-`) z;CPR67PFXR`dj0N7d@8ehH9suJ=JahX@v6H1Kk<&`bQ_S5&1qdj zbK@4ZQJ-0OXZ)wXUv%wK?1qE&6li0d`{+zu5ZbNj+VP87F;AY9%y?~M6;CdEyF|VE z_2tcxZtXJXoNi<0e~;BtCO}|Yo0iEqSB$%=wYb1VH&fH+i~Z%K1Kiy2M~n{7T-sYZ z^T1*B%hHgSkdRQbQQD&`gL}@-a0qsmPM0{0K2O)!L1Bt}aq_ipxvN7?=<}YJy>{YKT|(Hjdj z)6LBPPd<2BZ`nEOe^TV6oA~VeaLVPZbu`24KP!gpxc7D%E7X#q+sd13{|Z&$dD-%8 zuNTl7Gl|q#e0_-6BShA0w^Pj1&cd1b(JU_j+1i+cGWQ>!bV#k!-Mnd&YK+Myvb(!G ztNYX|L{_a=>Yc+5k8JYUa2I=68?OjyAG^aYi!&L_PCg?iR}0Juw3Q`r9SCXN#%(MT zuD{!IF1!61JK-^(*TVIEU?91n{PxzprW%)kunFv`GfYv})XgquYdL6Jx#Z|_o<8f$ z-)c55?yQXn*L)FP-c*#{+U|@>Ab5${uFIv{t7CMN6!*aR4JD~dlDSy0m(JEWy=o$+>w?v)ats11}2JL$qkFYQ{RZ| zYD%~(7Bl_+U=UHl@B{TQ)Mwxtd`ZqUt0CdC2wi#KoT4Ui>{=H9WV4gT+v@6-*~%3M zGoPHCoPylQG_QmefTXC7XKF2(k}37IwMc)DeE1DZZpcbruYLdiv6R;`_Qz@Q@3rbz zdX=8~k}Zsszw`5`+gc3DlPOjC$-6@NE^5XUxyeoTlq0Hmq}~sN(zdW!9=A|Yax^=x z&-3T{zAYbB#)|fB8praSI_2H6qtk5Q%1E6dQhdISReq4INSMx*D+$vbmLZ({U3RB~ zjy|YL@d>J{A5ph+`r|y|8EQHCZx;K_UMidrgOh={m|mhF@#+>>UlS&$tEV*KEQJZ^ z&m+IG( zy{D3D_t{eXu*)a%PbZ~zJI)Jk6cQF5RB!;ukVwfR#Q`eLc3p4yfilD@k!p}xae=HP zXPU228qZlm3VkxZ$k=fVqy)D#!hs^~Fw*>^yR>lrqiWWfB1a0lh;vCKD|$w z_$D-y?&O$8sjq*hYV|43cX}Ss22h!x6!Hc|SBnbhmW8pwo4BB4g1)tmEkg*^*tLdS`8f_wTL7KulxLZ}!EQFTm5I{HWt0R93k$2(%=~g1tqh%GgMFtwJw5I1 zU-(F96p=!c2%u`XxT2Y^b;M~xFKuW``Ht9=v)Q_KBc=E5G0RpaoM@5I(=4_(8Jq$6 z-N*Fj(&dzmNe|h7WEA%BrY4B5O&Tj6ihjftme1JiImHuY8hXM(T+m8m6HPMBd0Kg& zc}Bq*3GtB_dqKkrg4?J``!6mozFxTwoc~!cmF1;y0ezhccNcf}GS%rfcXt8< zT<01iknEkS&ND3+??nm9-+Ox8&bIx<+46+!Y~`iG2LYW_!RA&I%rVpuQ^+3{7WSNV zKy7YQIPTebiYMRH;oE<=`T;3y!<>;w#JuNwBz@kjN5R3(mq+)Vu)l(LI~INcw*9;M zWm^Aa39&p~1(0{G|J`^`xo*^zRKw1#FK+^P>3ZCr{pv$6`RjwQPEYb%)Ouw2)Wx~J zdxE@D4toRMVcA><)W$R+Xtac0AN}y*gJ9*N>QY#9b2I8E`|8(yR*4d<73(|Yy9yq{ zZMchZNsYvcWFfC*_nCF)L2m|;-R%|_&Ek0-dnG>dZBG?60J<4Tit$ukK6L0%q@;(d zre@fpnYnp#OQe0`Yu|})%rmEiCd+<(1OW|^avE#Px%BE*OWH+To+}{M!-o%}ZPLwR zJTzURqN2L-qVdkcv`KT(8^g*VHZ6Di0I>&|OtSyUXK|Oy(DwaR{YywIk;Z#E(2O`N zA$(k1=UnzNF_{TPge3X6LauTDJN4w4RkWnXJQ6_8!u|&_F@_-EgN9l4rq>bNSjd)a zqt{(sT?q*Z{JI6)x>k+PZVK(E&QrhR_5?+~;_%l;p7?CZBon}_p*{!H%JcfS#Wh?z zBn1w3c6G42BS%!C=;)b`p5G(gt8i9h0bmUbUa&Iyuu&B%DR{_}@Ng%j^4T6YrQ*Y4 zm)~t+V?$;eL3XXRdHCqj=lF9$r+qe@J$rU(x=yezT(EGVcqf;l-is@D7sF8CdR>O1 zUfq^+38PLuj2tUtm+vRZxiQLmxx!N<-Zi{{HMImKO&P@KeUcSRpV& zHBpXrPC2QlQ9@7HajcDCn-u|z*iM_yf+%qpd!G$k1F_ZSpAIBG7K)ABxHk=GH%Q{o zV7V7Q@U znMZ`Hv8!3Us?>d*F>8BG|0j=oa(4JPz7nSfvoAp^bG-9S1-lp3^f}Kx^W;WlQ~U5G z=s~rqwV`}z7a3X#qsCbSU#o4rlwXjnyI~jBLnF?gva#9aD=_^R z9_H4M>=$PW9AaHj6&*&6lK2|-sp%Oj$KdZDpGVx%4BW<#mHGsVSbe#$^t!SGsW)8E zFq8F4mH#Jm7Nk|7OdnF;&5f+IbaX1q7i49xiRWFq>mL+!vEgmH-`%_IIhW5N9wlZ@ zpVcV+WZso+Is09clI75Lns}}n*VZ zvcG-zwq=U$33x}hlXDRZsM&c)jX!zmtD0r~NY&4>3mQB$&kjsE%IHj36swyYdzjPG zQe0M6cG19v;M3c;Z-4db)z6<&@Ll$z!!E=G)*ZST- zD-4;2U{es2cqFx{lA=Y~ezJnXP27mvWI2InHL?u48yW=J*#`#jrn>shojV&hY!I>QamS_Nol)Jck0O5z=;#&O4xQ+Dnq zzO*2Buu8a=El$^=Za7WJByZL8VJMZ8oGR@01J_%2nX}QUW zdzQnOycbK8 zG>an;-L7T-0Z`)2KVO>#QFPCi+pq7?yNQv`MSCr~oX8&3k@_l^55NUykK`~o=I@f#q`%@b)f636jsH}X+ zNdq~nfAkIwjZ%c@HDl0S=!^s7r@kHovQz?1s19O_dHQs8T#M#e`mdjkGz@3J*C87} ze*Czt)XA$MJ397f2O^x{)5k>Up}*U;?pr8WxR-nP#bM-DWhO@S6CL!SHw;{AEr?mS4f zAodb}zxr(Od8K<~%kIOMUV5+DwkgDSe?GV1wM|3P`~?BE_w1WxOsYHWD*35|j-lsl zw`cN1;DYoX`idLLX~{}4k_c+>?LUu?T>iUZjrYMFU01(<*i+05DY<`i6k!|u=+R}3 z@38mj#HZZG;2{(*sketzb?6-&V!^0qx?77N$F=yr=( zw}DJDqOY)h`zJ(xQnV{%*y|%gQj1eQBxCdjg4sdtGjZmPjEs`rU^?&es*Z%{!BMBpM9|otY0<6K*>;KrBT?MPR@TUS)Y-La(Cz zl%mGPFIIsC+I~>xsAV(v;ltU$T7u=jRq;~b5nmFJi{_*XaA%7oBSXTUKMs21XEh!k z!kK~KIfjeGwqL${d0}>P1)YIGD5?d%Hj#$zKYdAYP&66uz2!|n9-$*oAbV{L6|o;U zPksaXeihOJSF|%FuoAE8uMS!P6tStZntbP&c2rQ%uKk4x?h(tiZQC~6FX`=Q{ou={ z*S1rpqmqeBDVpV|xpGUY#=5m@(c~dY`@OdZW~m^fOd1S+}c8cj(N$rkC19!6|sjD>C@5cx=qtY*yWEu|v$~o#;L%H(jjIubG+g##kSl49hGp zhX$UzI>8oFo{TMQ>1|sL>So^x6n_stq;3DBDG{HGQY8$@8$@I$=Y^uH-)?O#ysVTY}dd##yV}-s`IaRe8_Z+^t za$%zM?L;#Uz|zvv;578V+B-Vs7cUPm_iJZgI*qLy)Gb5zq^r=b7h)Zc9orFOpsIrd zg{U-YnWq96;v~rK&?-FKmnV;1H?cxfuo$1|dz;#5`=Fz}(g8@5rjf>3DPR1aar^chlR8E-r)pcZljep& z>YrK-z4#}6`=kD$i8@HWaqlE~@%LvXUlOXMrb)?-9|x$=VD8vZ%tfApWb;$ayGv%! z5s1VgjfyHqGYDC_cF0Ap2FGiz@&nFEZRJM<_*hSQsY<*I#qaJ;E?zdSl}RliXQ1Rr zaTir>fuTgKOTopP&8XRo%*?!0=c!j4LDejBw9sRj9(bfwJ9PY95UYcTeNrF9t?d-l z_4k%e|F&RzTt_p~(+`>Pow-aYLKCdmVFCh=^qp-CyLN%8%>S%rV_H$OrkL+d_@mRL zHSU#LQc6%zkb`6Jk&@)YiVzcTXc=#@FE7(5OSN5@H&dk0Bv=74YCzJbq~xVpN^$h-ru9%)WFNcFpa|ALZ|lOv~B9>agV08R-9 zo-{eeCZ!Ie3r6w7(H85`rfEE}gHo8hv+Dl+Ey%MkU!I!rIu8gh#2oPH(w2Om}9H0T*jBm6hH9iJhzTng>XlFIZ-AjJ<&^$Yq!Qc{M?E?OS@ z-hDw|jY>kz3~I5T`<@Kg_o@ATlz&pPu!YgDg_1v;K0PKCSbUbD{gh=Wl{Tm({*(HS z+9+~fx*d89QfCOkaJGfVF*=c6Cc2m-NtKTyvYC`5?s#~{fvQN;hW zU3Rp(Kpvuzi6>C3s$ikkA%&+R43u=8e7SwWYTa$T>T_Y)9P(kwOqqt2k^%w~$m{v8 zrQIUD1#E%L$7*erh~9<9TIF3EyPYSxibsN{+fzxD;;0wN`&xTyJ9hGqyQNBajz`5( zcTi5~1NZs6#^%msO`PCRkr!Z*vZ>51u=Z*F>yaa09PxQ1cns`#utq$jF1y3&TvH^; zHQV2^Ioas&?&gc1*3f7^cC5Ngk(LG_i+c3vV2p0vmrQ-D94(?3*fkNyImG*jO9@3n zQ!pqnFl^GZy~3lkECGe`#}7g^%61i!#6r^=iFNx11u4O*;CeSJ%MN-Fsp{wMF65A% zl=o&amgw|)&i_cVYDpcm9<|hGlKqM4q0W9(eVD z)n#D&#+XcC_G&^qHZVTCd%$fdr1Oma@H@@eTBhAzP(G0`Oul)qS+~5rV3#Ifb}?s3 zIheheANcG<+U&hQC7$ue#u8yt+R51#zi2#S0)49#` znkq=p?6UKy$58=mXny;s$VgZWH^zcV8RnV>*h@bYXIlcY8Vi$hnNA?}p)r*gl{gjS zwY;6eEyceuH^;+u3vvf!>S0tUCkT$s+Lutwtv^3DzN0O#qy)fQLZZ0XOMe19!J-wB zip73%$Hm8I$UJbuk|WoLkm5k<(VWWFL0cuit*s6H^iop7JDnzjamOVHpcFr76YZIn z?Jou)84ax6*0+lgawUQC*gXG!3pe!Sk=io@PY7DD$F19@K8j7=VDV0ueZ+Z|Kg3B> zwAZpAR`JPE3t$97UDwR$z?zo;3E|U#&aLh$n-AZqsi{H7TxjSns3)zA+@sjYqMZkHWUgX8&x7Gn^Gt1*dZ_com4`!cwJ ztnGwuoV#%&k90s#Fsp5)GXLDf_5z0C_Dv6C#N-qe3raN=ne=H@FY{YuIqIVJOtUwm znUJH;Cm2%ge1nvfj$ey4u7nJaS`QReVBKyk6p^R&-L&uzHjs;`O(8pJ%yjE*lR?|D z+2_y;0&oO1ND-ST>W90CFhAk^>L;J#oBomI{@f~`_xSPS!or`U0<0?8Z8e&FH48Wl zeKm`+lRElaoAw-i_hZ)_S7_Xm8sCWFe~)L0@SW(Vz2V?T?E)dW`Hvs(p1b8GyXf@w z6>sp_?h;=6L-{gj!XgbqKq> zh=Pf-{}X1AIxasXVb(1bP#UI#MuA)MbsQin;@sj6Tif5u`vI#&>ygF87J#NS({SV= zcaQ2kV78KV%kM6Y1P=3aH8aXJNX)A|5Nwlo4UkLm_k1srLZdO;ySEp-H z{lOw7G3{~U>R0b^N-{`}&Tz<@AFPia`SEE^!F8 zcxGLr#w1`+rd7dqfe=R#@Bqn6fN0Y@=FD_Db(IFqc1aAQ^fFLB#7Om8&Ur2WiUDs@ zcCnUr8{l}8%dmrykyDBRMhliSjbB3zf|f7u8e6Ouri)O0sU)J?$s95Wt-7P!t?-k1 z894sY4TqE@a}dKpQp;MR*DI4reIScKk|P|TJ-TS<{xy`KE!54(&YptOZ`Yjs39@;d zK@VX6iNuDwy3efH`kaqak_(){6}r2M#PSMXxPOvysgTbB2b+oEC`r;e2_h9`e3Thd zgoVgcxSqaG%QWM-$qoJe@o=)Pr`#=r^=CN*<1U%nzZW=i#8|tYyWhLhA6rawYSO=i zZJkbXQj{{IB&=f;+PPOtkf(U-Y?7k5xw#(@q`($zW^QgSn0+5QrqLzYpqHk(+5+w6 zZcgMkxSh}=MJsbdY%WwM@|i1F2B1z44mxsahSd~u)=F86wH=(ZhydS>M1v?e0Zo&N zr9YiA@t>{;5yJFGnF)3A>3AaFDTnQvaU5reyVh~H+-WOzbXmxeh`E#c&lkM@F&~CoFVpvG&EmBT-8`gLWENdGM=C zNb|_5N-B1(J@iVEqGTn`WwcD$IXGm4Z+xrz$7RE|x$SS@S8BQDYd-ldn7yXq8qXtx zT^g!xe0$+Y= zpKL);PDv*w4ln_Bqa*deY`2utL}dS_Pdxz~^>ncb#i1OsUYp?Y>Yoebm2XVsfD&vd(lMqCIgv8d~*r>B=A-BJiy4cWCp zVpb1W1_~d`jvY+^Ua(z`qL(uL_2vuYYBtfUgeZpg6fh536-p8_&9J~!2kb30dVL!m zvl}4c5CK{5dZEWd7#@U8FF?%;khzTR%7$$$e@C+_Ys2dz1G;z!of`k7z^SIA^CUR9 zU1JBsbS+<&HA)pE2Po8ke*R1`tXzio#&NQz8`pw@S{p7JnOGinRI{|3cjR+Y1FWe~ zdAd+5yuH2Ql$;%HodlZeMk>oa7|uI1tk^BcJNe^3R!WdqWQ2U%ZdWLSxH{F|{Ue@f z`R1ScaEr*`Nc9$%EkGwGp5e6cbhr2tdcpmZz!PhS* zs;6nz|Ln|nx}0QgH;B*`)GH#A4dXq4kbDpl96J% z`e*plYgjaWV{h-{zN^Da?4whKAnFk_WH_Ao75K<-5p@k_U(znx)rRmuD=nPQQdpR1 zv-x*Z?Uj^m)i=vdSR_xFIXj%{?GYV7nyDw3euVIv(l0YbJjUl7eZijymLL< z3d%mwX)JU837drc(SjXuIc!hrzr%9N#>VDi>$B-Ylm!3gC?jkG6uP&B?!OQtf6H@T zdQL-2ODos3-p|huO}QdrRX!SZXFYQBL$Ggj{gTJ77_R=tFn(Rm$VzdoOzoN1xc=5u z4S810b2bQO*sLa$lnfo!nZKA&lPB@Sjo~8y#R=P5Go&SUF&@rV*44eaKI>C zyYd;FnhpQ_!z1l8tc>FUwEGvClaGFB6B%zi)LSZM`eFB^?{^~x2PNn%-RL1Q@M#uF zjie_R!)HX*@TS(79!+g?5*3kG@ zF;HmYfGA)NfCdYXe1w=&X5s6foR4aLqp<-voC-`A5fK6F%9gEL(UO`Z1pmllow zQfG0>_uJ4A)9&5FMO)}%N8tmjU9eCqC|q|cVsLVDa-5{x0fIs$;h4!BJRWo^2$X7? zpL=qpa$V+;r}f}S8+>kl_TKdRiV%Y?#90SOz4+=0T+6*3Eytd1YZB#x6Cfh262Gt& zac}=>Ymwd(2YK4e} zi;J^9zWRB51p`m=%l56UO@30tgzs{p?6QzgNrb-n^a7{dopsJP+pQJl<#$rf$jC5n z*+BdR=;*YJ%MXQ_ z{Ny29_C1I`vF1;FH;4y=y8+vU~`FJ~xvWg2o`RMK~;2>=1%)Vfk zl9EDwgx`hYhb4m2fawYKr7%LVN=!5{G10bJ{oSrtlvdZ)db)!@9!D6!Ak1*Gt62Eh zu^B?X;pRqV%_31M?n;5fFNGlwxr8oKz7<`ywau)v%8-0t3(*bATHd6Pa^QPIzF}oJ zP79@J$_~LMk09;Az*W*326#lh(6Zr8PXQQ*ESsh%vW4-GS?-y>CT^?oo-Cg*0Dvm5sCxg&Eg{+@(&T!@7fl z1tK~yK;})0bHCv9rIIk6G-8ZOwS%Ibs$MfP9=Lhq#*L+AU#_(wI&qgePA@L>&2#!6 z@XC0`Hyvb!zKIZXL>!~*l^iWJ1bL{m+qRj8s0=@9x2|v)4SRLzLBj);HRgLxbXn0E zX=-Z9$q}*`5a<90+InN{xkG4l0hlPmx@L4eKv7aVXWxcG$O_vw)m>cC(;anYLXHX9 ze$9K+yWZa3PVzH#p4a98OGn1n|E{qXsiTXntdyc7|4w=K?3Cv#3-3(lakH=J8@NFs zCyj#htG*<{T+e|j zM$9JA(WMxamyqIsmI#OW_3!t~L{6NT10aPFAgh~goQ28JjGs70>gvw{V_XJ((7=LA z7KU?AOpC#;1;L2luwoH<2N*rk1tlHe2#c%9s~%52nqOq+@k3W&x7db3gfr_0G7Ik!k`ug z?GfhW<&`tYM;8#5u5EjSt^i`aWpuo5^G}#wZdjr8sXZFPbGPt4C7AsO-a+vLD2paj zB_rxhU6PmHzdyf&GOYIBDCc(M-m1>ta{i`Z(N;?AlaJ?aV}6JMwn#0zV4 zrd|InVqZD`3pM`#{+-QkqjDf57Cu422ACo^4)F8WcamRFD@-%u8}32OunHHCofdP!uYFo{B|3?h4|Rn-w@Fv8yxcRA(@(tL8VXc>kl>_ zM5B3S{|*)w>%+Ut_HFz64@xp{(!W(%x^CqMJ2g|7IUrIbHg5Xox3A9x!NCPD03}|T zwms~95gmd#bRy^xak6uj-k)jo$uUIy{Lg{A|0$68Km9q=KPRfWI5;5Z^Fqzw;!1)H z1(|iSEhoNg1d)a~&*$w07DpOr?^Z{f4qoRQaBAnacO|RD<1cv-iCMKb6RX7#3`>Na z#~%n9s4I*xQ$hVfXAb%~2M0&M?jy$UACM`2P{%IBNS33s4oSII7elHZ&~7?ydH){^ z^}uXNZh^6Urgj}R|9Gof-!bY)2uumDE=>_XffRY= z%9Y()ZX=lOs&6&**Yi=)1xN1@%451_#uL&3GT8_{eCT37AUA+;!DUP`#*AmaB|}V} zb#H|PQe)^v1B;%rVp1H)4Q6WoLNkGKI{saUxb5WRvbinaQq+=k^32I7)U=OknsMW~ z?e4Imkl!HpV2l@>BF~!l$dMfIb$b~usyW)MnBj#9iEOxX-SqC-O& zfM%yVP@gxJo_goE>Hb8<(ulKRf#>N(E6=(Zz&+*Cd~3E^s9~`|u3qM7X@Xd|iFL zgG)v~bqcI*y7Q+0InT%~Cqts6k=ZMS zj%FTELW+SiOzd!~{7WIHb%Mq?iBlY+U{Ly!L$(RhH1p1#zu?nsm;AAN-@YY`(h+T? zxncn+DK8u^VxU5d-w3||2ZFERBAJ+ikiuUyzHyn=_X=hT(wgpGT#CX9jhZY#DfyAZ& zA6QYGF`a(&-1LjoX`ld_L#OEo*t~*<`0G@g`peML=T=fe**-p#(CPa$KvpA9F~W{!S@eGGeWT?T`LJ_Md#x(bYgoHjVT4O3>^dqNy)j^peBJevfzh1U) zv-rDp7ikHLK0>qtYAYdn!#)Nkd~c~4-h9H~65~EygXJ@D84Sf3rs#GdITibQ9fXF7 z`hP?o0Iy4CHULF0*Axa39vYd34l&ZhW7o~17xSn57K`j=@mde z2FDIvi4-!ag*yPVs_h^Iux|mh2gwi{M#H^bk&6nNKh|5h0wEi-*uEq@55WZ}yGs%XCOKP_ZTU>?c0-*%i?!(w7vqA$T~a-ye}$`mF|T2~Sf< zJ`%ff(^=w0r?kJ{pg6bcJ0&J0l!58OrX3^YRY8iwx=p77Za{0!L3f=s5D!&BYFO(Z z^SGxLpbU}&fC0zK9wsKIzBhD+hP`NWFQZye{Gh1Lj!D&|Mv`NJF=TwIm-TPlWO zoVu=66Q>zlyJOS8E4V|?eIyG<3`)~7&LHvFli~QFWtb1~oD08u_uvctf{AqTy$4pW zhh0>H-be*TtqSY{YTpVTw}B}_)^<)g=3k^ZZp@n;{~%3!n4bJnoH{>XOMzL>34R~& zS)_izB0Ol~_U+rQ+~OgJcA!pBmF-}kC*njoCa*{6Nl3_V_$s_%0*syJte&n|x}AxB8&5$-C}w@_k7LFW)JsK`?#DiEvZ>{0UDhiK1l! zS#4@+3V^QR_P?ov?kyJ;DG*uDk}Q396;-+B^XFx(-=Iqxs~cL)eNK7E0E0-oc?gJ^)_mSH*jGcGSD#|&>Uz{7w7lqNoY{tRdk zoR2Ipt$|K!t z#87}t{IzdYzBn~IDVVxLXqe$+347e;&7Fugd;4kRz$Wy+$!Cy#Og}vS2}}(K1K7s@ zbr-39JF9xVR?4Z83&7b3g$KE($<&%FBR%B{cxDPb_@D7~B50OK=C!g5$mk>HDns);4vQegvGXo>;)}Yg@45@z1AXDW#eqgmqJ!P zqxnB~cK7@hk*9-9ksH)^6J6g!rn^#x2y<3DeKbajbM4w;_D1dkhXYd@8XbL#j7+87vPC_dZque6e-t%Yn8LiiUA4)` zbz7An2G9Sb`v&*#k1?4jIaGDny;kNAru~_uv7Ed`3|*uw($GkV=Al z`J&@if0e!B7#PJvq+zC~SHrK>^lYOs31gLM_htUS9gK35X7i(t0O@FaMN6E4vMgX& zA%5u47(5UtX}K@1xllHzS>2cpJ=M8w1UPD@AwS=Cl8xB15Pd`o#^1Z(M8V`0stN2z z!Qd7T9$c;7=Tx-PPM~Q#&Nu==sCvrq6l&zuJy?hxv`w9Tu*X%ABueRz;DIaxX^y z^kKphOVN%%z#AQc51VkF;|+ggIzk);ydoa?)p`swAjn9#U6`rzi;A4l*$-yN zpl#>B{)CD=o)agCBoAd~-!V&gGKt?b+I7RIK15*6jQZn=+f5uVV~cK+zA{t1t!P_n zN=}(;s;a(#ixP)}04WuMHfPS9p-`%Ds^;KRX}&^-9ITIRO4Pr{$RDgETaFhap{c2< z!AhX6-i$$yda@1Z9_HLtjE&=5#{79XGcRZDJV@&Eq@`L)?_W<#rMambWx0p8UQS+~ z$b?W`!R>p?-7eC8LW2_Ns{MHh?>=#7WIN#_M?S3?wbxuv>7u`>Y(u=4w4ao!Hq%Pd ztncpjKwZJ>F@8A7wmh|w{01!|%X?L+Dmpr@;I5EzHm0wxZKSHgov5OikdP2qO00Xz z<^ZrMv`$X5D8?Oe*_M%FP67uGRCR1-S`8)B{4G@AKv2&*qL(prDnnBY%Z=vh-@ndi zkUZ7YDh9v+H{Y1Pn&%@!9d1rR&W9ffmGu`)m%b#O3=v65gO4}wcdTZut?~hDjVvuw zBiF%J^v{V^))q;bxtR^o3oR!KEj|6Jr{DzeZ}oEi>UE4I&l_aB!>b7+>%M`fvM8@! zz{^*u^i`(0@f`?|~b(NCUKX?n_9{~SlVFDNrGtks5z|e6={p!MHr{Oa&?SxGR znJR&5@kFo?5)VQS7vwgYQ8=Q^N_@&o1AGX8D@H6p{WsImVW_MRN0@kY)`G$!=5i-H z@<&m;>9%c)#)I83^nk@s*t~&wbDPXeKdBamI^uaanYx7nf`Ur4pRoE+eK6Hq6CZNL z!~}u}fp+1@+D^fY`Gv=ai9u7uLXOo;^1*lHH%o`N#daLtMt&(urRm2ZQCJsyNKkMR zy`Fiv;pkaJL5Z|8j~_jn2X`y+w7lsWE|^BFb-2cEo$^~ClTx=5r)%wcio**5k`5Zb zW&fT{UlQg<0bXNcV?{EyI-cAAW{E`Ga{Cl5iFVD}*pnJk+aM?1z8y?MY}&JL#w--# zrj&Wsl8NuhyKb&-wz~hR)w9BTcK@mmAoKu<*d}i|?R~W1Ad7gMK~I*`yZd{5HqvM6 z$`Ws-IhTEsglZs{^tn507Qux^wL|Lar+g(H+xJ)!52ssnCT``=Sc<5Eqa&e`y?*`L zMva+x_seKYW?G!K`k>s^FTSV?X)P6yKtz&PH}{g+v8`0dGmqH*E-Zi(cwmS>RPp=x z`1pvYht}pOF9&hgoHCccxg)C*VU<&tmlqL%j(At8sEX8 zAwK=J&)A_}*?NC&w9Gd!Fwjss_)|Xn$&achJo;-#QVw@-O_cYRj=1m~eR=X3X#Fzr zDkUtZ*5ElS6$dyEAAWF)J(k07tJ1!gJf^2PV^D|as;Ol-( zda-Yqh2P297OzaKk++Su2Q?cIe^3Vm>c+{aPv$6daJ@O>x^2|=uC8NX`Zg;3_~~t0 z$d4d+C$4{oQXFpJTEpo3Z=lwVsLAjpwpe1CFBh;3lZK8^MMbn%6|~qV<$EN7Q>QzQ*d8kD| z`qJzG0jkW>&gI0I9Xf0dnZ!dgX)ZZwZma@rSO3(B^n>Tpe2HC+N^c*neY8*)X5>0K zWMNsqkKP3ajIV1DZ=xy9JxLPK$$QW0u=-b)5AU4*wW=K5rO8GP!8jniR(GHkyoN9U zatuyPJoyfHf7GJsO~K5+w|)kv;>*WeEDMxvkhwxVH*i|bOuZ9OR`<4SDE;hv4Ex=z zW>m8GKZgj3hT`Y#O%os!f8&Jr+ASNT9UTiu2M!;`utZ|rnqALYM*(}-Wj6uslrV}0 zwj|wuo{(^#O{yXxVW8!rp#&a7hi)pG3@EF2=-HydYrzoEVQl}ucC~gG7Nx}2Q3U=} z+vT8$z0%UsK!pn(R#`+5z*z?=N>sSF8jNkxE$3)~6oz0e)p~w97p{Jb{0=a_wz$K@ z;&^)?N>Pt6$HzNv@8pC5?hiFJDgU>Hgsyo6WTPoFI5-$RKTJ;Kf7JMQg+7j)5=A*d zcN6BV+y}-D#oU@#ukGsLG&^3xMnZFS8?on@O@NEPYqIYpKxb<#`|5iR z;tZ% z>0B|LV`I3D__UWQ2qu`OJaDvZo^HT%%8=IxVF)ry#9jbz0%F+KLM-`bGoyOR0{|ST z`bUo)Yq@k7{Zw061VI=JWCe*26Xi%-7aJaa5gEbeUK_|K*0}bV>6I(AWMgAvD6$^{ zh^tGKhj>gZzoVn0o?fI(yezC0uyYZ9xCY&D1LC8uYT{PF>Vcrd@cc}$F$X;-BcpE^ zC5(!Wjt&l1&AFKLnU2f}Un3kEmQ49hlLk?Wh0Z_w0IliB5YzB96j&nh=(jf~|E#9= z+!xpz;&~@&ErENF3!rNjIEs8E35O{j_3G!B-1hY0!`M&ePz7}!oki{zJaB-bc!T1H zxo{MzW!RyomlXet4^!t4QV9ZA90uqJ2{{Gy7xf$j6-jfD6HiC-RQR&=avgoy-1nyx zKg`@TJUyPJm!+75iC9=##tGZFx;mzmf4wrD)P9q6tlkq*|J&3`J$=YK{GQ+M_mnT1Xtk-$ z$(1}^{N8wXMBj-o_B*z^6@M%3#m9?5ygj}YEB7tEh>OOsj8I|~DK-FIHMBn$-ii6&ndyWz~dJa-oci@krgi<@^dQ{y>C=Z$s~~c^t83rrfOFi)KR?h2U>^wI7f(oX!jJC1bB38EJ%kqJ z38pCQFwrfy*e+B$5Fh`YBo}DzP(Xk?SwFFyI{WeQ(L;DEnL{T}3W#gRZMkmUD4#ob z)^qzKOn1v-H@A8rP1V%}vQV-8VBv4`$?tn}zAvTG(Jw(Za}(Q+J>j7p8yY&~?i~F< z7yuqs>k-&)>$-D%&jZt3Uv|?jlfL_~WPmr({=l9+Kg-HC(PkC3Yt^>9XTzKP zkI`j%NDjy*fEl;JFw=?1xdRi1Sf-$$P#(SB_oP>x-^p>RS#?SCpnxB)nnj8iNh!zFYo2AT){{&jFO*YO=KibKbvgrv>=)GyN5WbT zDIYmPhNilD0C!V&m?WqL-(Z8aR}tpEoWTzR#(bfm=LsEF4$xYS@l>M#_>INm2FxN) zdWq{0f4pWdoLJ@^LjDDrpsS){JVh&Q>_yLV3PzN*t=P3>$cvqlDo(1$CWAnGXYZ%N zkr-?*7DpTLXL`(dIlrSvO($;O2Z$VCwYF3ey~pn6WT*~%j0CxCuB!OKTX7dB8K~x) zznEuav}aBzR2HQJMd*?!z9J6TO9K7NLs{ER`q-Q~Nuoj?fh_jH;t|(D7UEPTF@3SE zdC(}!p_IIA4er1;sF!@k*tAiS-??ddfWB%ebVQK3t4r=mEB~3d4k2$f*v+wnAoI

IGKb``=C8E3Bq04F{j}#bKvY}76T!SOv$^;W+0>llqeAxPk%Nei zT&HTuyY;$PqF+l#&Eho%zmPRx#?f#KjF9BgF9Wl-ugA}~CQR}n3l{6#$dKHT;M14u zWlq1M(4TWk*Ohb%of>*AJOexS8vAsoujE6Oh3jk|p`lUkqi92qS~Dk)vyPk>58zk9 zn&Ovx1(G+xvII+U0~ICMfLl(VHZ3tRQF!E`ULk}b=l={*9yrqOprzz}b?jXFEh5D6 zUZ=La-@_AK9c$^_kl$Pr&yL(Cyx$auiyOSwQj%)jsv#46NPDiJsHg^9&GI}rsd+>b zXnbyNZs*?P?UE`b_o8O0OTSsOXY>3b5k#kG60O6+!e~3Z{)c=OqMYE(HdHrL|B^KQ zy{fENx=Av3VckW(_kBa^2 zX_Q{uOcpH~W^_0vMQZPeFRx;h3$)4>F zpbXn)JiO6Y*=D#YpCI}@e?BiHLaL1K8y0s7-p_wG`J8rY+_203_^iQowj!lSN^+0g zZgN;}OT_h>*ABFh6bz^)wXm_0*-%w9yH3#uQ9wcm*^!@0J~q<$BJGA=u$~VMIMa^>N`IzNgXchLvfN$PsX|S>yjgB8@V1qG#Yy|L@WUP zyGyP6;a;bU>X8i%9cB<@d!tD|JdBO!X}N|DBjZ%M-X80HtuHl#@_m)IJ$A`0?gjEuFC!AD^0|Lv zMjWq*k4{c3ku1Bh$6Utz*fC>esVlgp>0sFvn!w6tDy8YI~#y>6H23+9B%Jx;w-+v-=p@&8ejbme0S z>r}+c$Ns$GZ1krY8S4G|-Rct}xu{-g)K@ULMM9#dm)|Gr$5-!jTCnaNVqeo-EByHJ z<=KAX8(zyyJ?9@ElegvUq72%?WZEwPnn?T!YO7*jJ1Gdk=8B(w6zEgJH$BuhI(*Su z^@E&F*Tl;!X83!#smJnmp@UvA%HQSkZ-Cmsf_;Gh(ft0=UQ@i!8 z-I6C^_;>gmX-s?F$I{od4%4&B9Rp;YYf0o}D>jTn-#TmEKS^v*-KYWth zr3KLf;jW32Jm>-w5DdvZTi>azf%rgw;Y_C0n|pRMpoA;ln0R8i7Pj_re;%CYrfvVq zilM2cZL8WQQ`}npCX2KBs~qVJkutF_*T7&I_J*o^5{C@}*&Vl^lEomn*Cut`*5QKN z`@}!QJ@J|!{!H7OzDr8><+$B20P_=^Ll-Suw+_JA`C#t}*@B-l(VcQ#?A2~VBGT@; z6if6kE6vUhK-PG%ClcYVUUGA4pa3{xfI#!zaDe+Qh4FR{E_F6vy~pP@OQrO7QVkzEXW~14*8H z*&Y6VpsxE^*#pz&mus=$um*W~rbj8s_U`t2&(cEIQ=nLBZve5e&$V(n#BVD+Y`5b` z)W%j}@#(#Fc4cy50r~c1*v@;1}}vx<-?BHL_JAMJ2Y@lu9Vq@(9r#O z9qnuX;Pb};oT}T+XAfH$_qzJ^$5^pU^_xtM+nnYo|K*ooP!=v<#$R}Pq*Pf%oLX@B zoaOh|e`rgN%%(FRFDsapLdgIQp@Bb8FROSXSzPZCi}gcRZZMs=!P-sy;H`yOnFB~U zM9t1|N9-3B#=zBNz~rw*-&An)BDouZYMf%;Y{K7PmaqT$(C&vvNz7f72j@j||HGk#s!@HMt#uDN=%VB!~OGwXB%$`-;#l+aBc%7t5*| z{?EL;q$)MIUr<)bGR=P}xPqvBL*v-ot(k=Jy=v-4%$ozcP_UNN4cl@iJr6QMoD$QhP#&!)saRMyLH9p-oa+m(H#v+OJ9c9C%~j>xi)wG4>}JJKtpgMM;W*xUhsV#n{_ zFIDmO_xJw`J3dX$A@i=h{Luy{v66>tX!uH2@;jyf^I-kYxOxAoeeiESIw!M4uLiYY z!*C?um~j73xNSU`!((`B;6U~xdeHPiFmVp%&YN?JVp*M>*CgvrStn7s<8AN!L4@<- zdzd{_Za#Ya__uxgGFNHQrHD$d!E7w!qw8PbRkO4~AZ)8+IJ%-#4G-Kx<9>YLIudfK z)h-72?%y9gXb|9=J0wPzvOgmRTj3#~;wmjNnaH$z6x3{%-xUJo&hQ0lYdx$R((J;Q z{lF9=>Xl_&#s@}4QSCV3W~fC0qt{X=C}_?h^GmAh*}aq^mO28fWzrDlADYQJo^!=V zGx2u@T&knN&DXS z*bfL0hRF#4o1~;9xup<^$lz_zf~;9{k%d}#v~qB1aq;R`s>YB=tw6j>^TO4KsjI6G z9eSZKqfN<}xV2eo8X9JaZS|KBM80aQSJJLK6yGd;haUaHN!68=O0*lmISB5rPfux= zp^1_h(%sxc%a`HYD0k=08{uabTM?OPK=9hxJYjsk%Uk0FTccp<$`)^s8QCUJo#0IZ z{>YPSX=M<*f7&t%%YjB&edZfXc~?|qUanO=M$WuzmoELiVWF%`*n~8H?mMqj9xq1# z=Qleh1AXOu0DKFSkM=pHHm>w(UZSAv7 zm>K8e9D{VwJYsEDcox`^P!4P^=W>ua&X_f;7t2XCf zf14raJ4Ae3ul{#%hkU>|gBB1y|F9!O@^&%wSO&`d9d69Zo6+X+Ih%HO!vE^P&n-4k z8qr#%VwZlHPFn*n(e})`aQc*@>0e#;kPadsd+iKAEq#ZBfJ~a;I<;7GiUyyy|K)?6 zd7=cTTy;ySL|*b;{oeoP@%6Xn>3_z{7x`)iw^G;C%r32-VbSX4RJ3T%2kG~O z!+Ln&Q0t$9rSR&|OMUXhBCd=6(8~B*cB7&xnjxp_>gtA{+Nr<5UFpCynxXC+oSo^F zjkRQx$=%7y%1ZG$GCi1*JDO}%<{i1+ll~?{cHHhq6-mw&I`_yegMJq$l6&6JSZ|)y ze5~z|WuuONJlo$g($6+2cn-XEEYf92{3rMG9$#EtPYQc`omcvwn(qIxHT|D)cmK_t zCPB?wKl%3~N2-KL3!9_kPV^~6g$vY!GZZBerY1$+0N9xJ`&*b&g3>^uiM~mMFOtBc zmn41ivV8-*m;N{5YKFgr#jxp~yg@Y70t{lt$+lO40htLA7TEFFtij7WNb1?7c@6Rg z^rFD6%`huXi+okA@!j4Zexq2<=r>G2jxMhE;pMFikj2@)9&0E{ssLa zcDBqES?9#~Whb8xdKvh-%@WZ%z4PSBlbE@Ww_Sca|zIww17irsT&tR0Jp{ zk*1Xlt@h|JY3rI~afHo#5?`<<_8^V>k_$VVHP*XCPO7*tQOJX2RNzbw6OB9{xxBnQ zvnS+Z4wc={PGL1xxL?O=(0= zDK_c$DpmWKl@T?%AHIAUuGXvL(ay@!($eM=qfk`vr^$ZDfV8AL6%`d8=S_yN!#q1& zwbantCuyYgk^$S7k+1@W_Z|H0>sJ%eNPg`py=n92&OiTb1W#|xteG>bzI}W5+bEF$ zoD6u~=Cs@yS{MHu7#hb5ROVK~g{Gr(Rqh2(-T^QBCN9r-Xv{o(74jv7T&FgYJi@C= zMXo*lLttU*=Ne4?M=Bj4GzgzVrpF{o(8SBbTc4IQfOOGPZ0tGj$Cf|WNf&NdNmVf`+vxOTqj>VJ%vTgDe$M%i0A3IXV(S>|fDtP<1>T0r`;M)Zatv}s0 zmmPAyC~^)xka>yHx1{M6K+ets$rtfRb^lEnDHG>j7@MNHX$_Eu8X=ikdC2Y2@W~r^ zz)YgE^1K+H%SbJ)jYtmJt6dEm3x|6?E8Yl>)dL@M}US0H;g*MSg4t?(M8?|RbLqE}~6Bz)#V%jb6wsIy9@AJ4~n^lb5nXs@` zpqR8jAE&3g0ZxGbjl?R2Zk=EzknzyWfoxQHl#Bxo!k?3KP#y4iCea!OAO@{|l$lv| z2m=`|BKi9Dk9%PH+yp-2>UbGWXdCGEh~q9Xm5S=d@US6jP5W+{%Pv@2HEn8J1FbE5 zHaBq(h}C2nD)&&*<>MF+BBo1&7?C-Acd#cP8f;`6XuX^vCc<#rxocMk6^YB;FW77P zSCOW)0JH%iJhzWEGBPSEEUm7m9qJHk>FD@nuY(?QBgxfyw;uTTLl5@E7Z>8-R{Ar& zzoox>5&rV9kDO;qLq1q22iA8^%~Oo}0U**;RXsM&c?E<*I+%iJPqvzt0SkOLW(o%m z_y!4}9yyutHv|l&2^j5EIAh1{5j1BdLm;nW3jCc@zQ#mHn>;$;OFU>C~D4(KpTu)jItF%rCT7&;hL`Ae)==r#A*C1v##YmZTUD$q^}O*1@QtxAn5IFuZHN z;tO>@$w%KSKKkaKQ+IqSFJ}m>UR5Sn2eo%m-mdNjUAuKd&2so~7Zu_Dj&T8~?f~y= zjA#>}kioBMcI-L@gOd~8vngr-;QTtY&`J1C(+7Xf3tXx1rHSLmTQ(n@r^Dr8E)7bX zk{#70e13`_Eddp{>lkOCKnX_WWc{$-0=Vx7Xcfa=S?>|*xn4bv{%W*}6L@-F`yOR@)54^;w zP&5>HH;-=9)2C0dDURM7i-a^I6e319+B4>+f+*k2_UY%z{<@tzbtL!Z_SWgN+)>Ffhs-1&=le=?)MyDrd z6J{-3?x-)dCk{r)sNiA_6&oCusZXRo>fWDG?}0FI*H)?+qCnL>etYqPxHSu*@t>Kx zPi8ro*bI?20DL^IOBhR8X&9WY6P0*+ehpS%V2H$`z1lvW-iQMe!?k=Q-1~OHfFs3oyar^5*&&Inc zDIu?W+b*QLsjq*F1ma^~FCRJBQX&v>hDHSXb7l=;SgKc=IdUYsrTXjFRj*<~MCau>Q)ELk8JUyOVH|8F9`&18 z2goQWdX@@(bEJCS-hLxSh@p&aq&t3PoFx&85|am}EGJhGX3_DJX|-P?$Dnk(b}gm0 zEs2|To(Ra7fHjxh)qt>7Gb1g9u0hK5HVRq&pH8sEO-RlPT11f7yq?JFMm7X#X zB0eq~u5>^^|CHuhLBmf7y9efNh{mUeVl&Ql3-Ti8Wo&k1yLqz(-WU(R z=-oR;FoeP-1x+_iZcB)L@@FVHWH%{A#w(Vgm29R$2l?%8h3K8~xC)%*hs{3|Q+wl9 z-DR`8d!k(<+U3Z#_PW9pV$mezF_#!C(dnsH2;JY>s|r9AZrnnMO6&({TBSE`k)&L- zy?JBr&q%<=;PajsC9}RqoW0a#!+ve0J}nt_T#zA zi&8TeO_4Mi)l#(olfmNu&v@rQ00#Qj=tPE@tz;X=S+7RPpk_Uno+<}_+|rcx$u^E} zw6pJVat<=82SF;2Z(o-yCZ?~xX|}+PY-6C2_`BDY@lsw2(8awHIe>lgr^7)kJW40+ z;y|OV^?yWXg(H?-laM%g_KCL~NO{UQiaqksAicB%h>|x$!nxf(q6yb)-21}9SOy|EIuf-+(;fP&&(58%9Yg3N;Yu-<>tiNBB|-WI-i|d^iZAZA z3ec1qD+^;?ACKQDH@=p!UMrZQ@m4hFh{6*Vm(vJuM0&}d{Nm;Eihg1|J$|7TfoK-u zaJ%Kn67e3b=D;D2P2xbnU44M}G1RxvQy!MX1j9TBudUyMYKk+bGamCEZ{f~{Lmte( zkg#TwOlsxoT_gt)@jPf$-{j?d;y1E+-Q062o_2KhUi^(r|F;YrOsgsf6WK%Vb^RIo=9fh=`E~uq36dEO2 zyP%@T+7`2^1__f<C|^pFCbI~^PV$0!mei^NY#E9*Q$uE zQ)4fshi_g(lm8|jUF2qPI!j2X*oB^rANm$`m<}(gm^xM*PW}Tg0Fybn#|AlujP`r8 zaXHTz)`+Q{-8lr7R1HOs0|SDB9M=X)6?lMt1lxmN3R6#lJ|35*Mh>6O&6pmE8*aL{RO) zq6_!OZmDu;-fM4R_Q=15oZAH^t-5+C^U#9EINBg>0Lwjj^yt9vok;eD=lyPqZXTg4 zzf0W7FHI|ktt9t>5nB&c!`ayxhNH0L=jl(IK0P!fWTDv*sj(^wQu--^^XH94h8TRG za7mEcRhVzzPtUhaKtx0-xA0*mGcSOnOPPj--QI= zLJLPfr05bi*quY#&8?Yec-RUEgrbGPA|&Bpfp2S>S|bt8KT4rC5+m+pzP*@leB3{d zD{ze=_M~Z0ACbxXG;Wp*T`^Pl&F_9-nwW@3;$c=-aiWiB;|7&`xKe%$l?*;0!xPE8 z?ju+XAIH>yPZ!pv^g%`;_-DA` zBqjMDJ({XP$CL)TL(XH}*|K{1d&P=`G~dtrYgN@71~Rz%QFZ;SLhhda$V@CryRoQS zOJZHCR=DoIj-vT0%AmnlAmA-Ukx&#$c71}hj9|+|xVh!mA^ZC6kV{NT0?8(S4dC<< z6_|1d5_iF@%^C=Ai-|Qr7ofCqG&x z{8qMYKxQx!d+xYPM9PfiB0Uqt=Rp?f9&h>__b?WeQu8&?vHT8X->5TBK4WRRJgZX3 z@dN|FVOac0d;G{*0+c9;*RIVvrTZ$z8hUzPJsTNP!9C-%f0mL}5;_oVHB8Zd`31F3 zDda&9&u2=bf>y82M!~6ak1Z1(xS5KO z$$xEOx7I9M^CtW3yOn9T@FtH47oG*@5~Z+3$FGzkT@2vWMpy6z?h0j&9^wp;vIu}$z-IbL%ya7=i#(?Dgeu& zzHl%q#8Tv>8)#HQ1ws%cArxLY84a1KQFCrgT{+IL-LLjH<}Hlu+&SCF%<)ESRH`M{ z`gTeDiLkU!R#SY}NmaHg;k@{9Q;~t0t^_mSUNNu*((H}gnU7EfrB%)tt>WzwG;Lf) z)*NyYfRGkezSXvp;^J4pN8<2?Y06O)w)8%X9QeR%Ip{cm5ku^Gd+gAmCF2fwbPapI z_~-X6HoMHLeAQC9b$L;#t-dUnJt^PbXV)$cCuYGy;xmlMvfPyRK$!9o!7A@* zcANOa_^H!r0=yv-Wc^lij`L_(tr>5TtWNaguUf_WE*nLUzJwdHa}o%NI5jy=mz7%B4=b z(IaAz(L87lG@=~O+nXbHCPd*PjjwMGYctp_50?W43QlFAk3of1ATk zVo4ez z9{t3#Raa-paO5O=NPpms=yJ;4$;_PJCu}jcJVk9`#QPmPrfR%6YliP~PKqxUb_WYr zu3VYX*}k^U{A4ucY4@L1nj1gqQN>JOmzI%1#`hK*2WWoU%(b+9@w3#N9VB>aG0Prw zsvCG0k4>?0Q?_Cj*0gPA4}Kb#8Olq-Zb4}&K-|1Y^9Dc=gxEQWYITdsp$U>j(k3*i7%3If z7W9#09)J@Ai3Ltd_5|4u{W@li&FrSivg}KylQ}eMwoQ$*z7ZcEZyu4nwPBMOLi_mS zQ2R#lnNN}lH33?uKYrXtcgy-MTed)8!Gurviy>HYEt@Q5GMX$hYYfyGkMq1qg}z$K?_9q zwlNg!Mm{5-x{flMpDcfIAyd?oWXB$Lh5jrR%mu#w@dvsKSJ$R5o-N>+-w9mAWBU5j z_xIRfwJ|t+x5L_AJRTNMe=0t7A7SoGNhpeV*^ImydY~ZJRpLh`5y=~D{y>+ z3t$;p12>W_%e>d|TkAKHww2a)lG2|t#CO7n)<`+4i!+A+8cduR7_felk%vLSP%!g@m+lG0E9P*l{tckjrJ-3njDsgY*Z z6|A3_V^Y%A>-D>%KS&DyBq)GAjk8PX)6;i(sC-XQjr1i-&w7!aD&~!!NCy6 zEhcUl$65uLk~cur&=CVr33`jboUDo6KV;y*)FWa<@RXe$`9ahry+_%l0&P8hB|5n zjigI9$)fg$)V zV1fO65;&qR55 zBKDDy*+Od9T=&HD#H{nwWQhq0Vn8W1T@xqBTkaz5RKA`*v&Z<2s|d9m6r$37(Gr!M zB)d1f{)1GrRrh1Jmghe8dij%Umwj;ymv&z}#V>NIpZ)v}Z6aq4syaO)R(j3Gfb3n9 zhdewbz30IaY1>AZme*oNN%(2CwkPbC$j?@D6M*#+)z0hrm0i^ zrR_d%N-|UHCO@=mdviM@Q?DUbgJ)f?;CxFZug|gEK-7mm3&jZh(Cv>yTyr z>hh8{^1lB5?gyRGT<-qpR#C8iKO_e$V#PjMzh3agA?$<0xkN8GiM7XrzMhwRGwosd zsD(V6wQI{k8KxMrc9VTl8*UD~ZtQK?pJ%lpDfjHzvp}!Ix(7U35j1226e2|Yq!YCK zgfOUX0J8&u{~PbaHq!KW4vcYrx3yKn!BEd11BeUq?-?7U&Wk;Ds!#kIp})wm8P*l%?NTS_?K1M-8>KwC?R~R(4-Jda6#qfo>>7_Jo~2y}euDoX|y@{xd>$?AWm; z15qry%HZ_+ghlJ(t)g-|_!zAF9v7$6ANt+L8=ffWD3K;5)qKuqNqu$I&dkjBO?e+> z3bdev?k;8=dc+e?PX<`w>x#@h(h1Kp4BC0=B4Y$qHr`cZ0qmS*O6G-530>Pyoj0$G`W*Gj=HWkoQMld3ASw4! zW-%o+_AKved>%c(`1<&pc?%Yx9!;NQJW^$&yY;Q+55;|d%sud$(AK@(fCeioE9U+g zOqsGUD>Zd@W^u3Hy{i*GTvc*E^2&y{p1AG48C>1F9-(%^af3Y(vZp z`X5?FKdEte)P)PHSFJ)N>XY)4LtMy0|qQy3xCs{#xeK-AnnkPD|-Z zOpM2UZ|jrnsdp8##U^!hjCyi^zLxKn`t3gge1#mtk@xBMTYR&YHl+*72>2t28K#_l zmTEVu!brn0)4puBny>UvOi_@Iyb{+U~Lr>@aq^2!C8)l^(#N^?J?g!7NH%2C$U zMe^QQVRdio*Mc`y^MHL+f&OTIyvZM)vivljSHS_ z?5|fm=-r0?yThWR#fTG_&^Yi%=dSzS^9V#pvPGNgTQ(3Tl@6?+3?i|tdgWAdzKgLQ zj@g{(w|Ya9oS>uQm)ue%=x;M;zJX$j;tWg#KTc)L>aO$-&3dUk#%k)AyvZfomv<_g zmj346mwkhkT^Sh1PXaLHKYMm4Fi@jwd-@2311ed#oG)i?I7WN|2?XYQk(1L*X~lpkfvbV{&!zMzK27eC;8;j@kDP?C{)_z!|ztx5i5_`1M0T`mL?4 z%QQ}q7g=BrfXvv!*7nIG#oRT|S6Chxl2Q@Yh!U18=FZyd8;iTR&W_5xVKn`ezyA<9 z8KiA>kH+LP(fP=3LaDG{hvM2FZo|b__wGUXe1>7UgM+%~{fAZ-7K%4h^UkE*+;jBF z(&(@I%8uWMy^CN&ZZ5XBbLMoiXaM-AsjfB}*rjFHLsHz*`D(sz?~_9VIq|s~@;8^P zD?hx2I@?Swiw=stg98~)63%L5?FV(h-7sk}nl%d)f;pw!bNb_v5fKjS*1?Z6{OFWW zTOMg1@yo!<{4eGAj*R%xjo9qPV)zCGgMG1vy83`K&CT05X-w4;Y^xZr)3-;DueGs{ z_Re3UJC%cn6qaU$s#lZm~xE|zShz^OVQ>CrY)9(5)@ML zf??VAvD(_&Z)Fc?`ZH+MjExSD=$oDws!rI`*bI2h?$%{4^&#zSPtThzy+aGLHGkIW zEabWi7p{(fBYHczMGz2ZV%fU5Xsgc{c$l!5d0^&9v^t@oy8<+D>KndRCm^WLE1P%) z<2xNySoTtl%Du-qol8mz*j={C-NM3xvj~7?`SK7`wLxbn+>g&Px3QTtcI>gEM>o>3 z6Z})V7(U?aL&gG?8?xU(BBoQb{{`eNrw`n0gL8 zB_$=@KI4+M9_ce!9u*Nl;vT~@fZYAlQ&Z*k7$zqt!%taa`~2-&kAhpRd}kEih|X?( z@`pT6v%r$lX?P)WccW!kti< zJ9Vm$Wu&=kSHa3@@59b-fK3u`;zaGimb$Ke`q<2!d%Ng8Cd+(!7nj&sgRQV;){uVY zf1z#mK^pqluw00Xi%Up2?rEzjKapwyV`_+xSFTh6nS3rUZz{X;1bfF<$$^%Un|Q{Q z8yc#rmn|d7KWL7>_3U0-tN4}Y;yCV<;euU~LaN#9WsE|v?fSo;I8jd;9v>U~Enwj= zKO{KO3+Mpcq)!y^XcL}gv6hdrJ+8c3BNXYB<(LhPU%jkzUdEjD`&zAkxZ=v3(0vj+ z+Q(0x++6>zwFSx}csdD7jg33YL+{xAMhw?_$UwzUG?=xEQ(w(oPBK<&Yhb9Mp>a|8 zhd0bpc=@}oUm(`1va+(nBKXJ`qoTf#n-iLOJRsoB>(`cMW-JA=D|2(d2Z_J&kk+i; z1&0Fz?5*t4ZJnHP2x>fba_k1!%)boma=|qHxB3+m$l}Kj9=vnwDf5p_?Xr^wtm|#C zV$~{Dba{gY`9$t5-cSJSQnn1JSdL(4?D+9B_HO6a~)UQ#|k0-01leU%@4Hz^sD zEjuMQov$tV>tBEW?vI2_N~(3&AL9-F#hd)U{@4r8R(o=@H5$(7{vs~eXzCopO9qR7 F{V%*`xFi4o literal 0 HcmV?d00001 diff --git a/crates/soapberry-zip/assets/rawzip-write-performance-comparison.png b/crates/soapberry-zip/assets/rawzip-write-performance-comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..bb121c22d18c61c584ece3be6fb333241a67901a GIT binary patch literal 43129 zcmce;2UJv9v@J?&s~{pMCXis60SS^s5KvTgAGiktp#GsvDLM8Hh8y5w(}J`ln-OrKpKNSl=Tv5I?_eqahJLi#WDQ5kGBJ z_mhdAJ&EV3|H99g3XlHd0D0dleP~55=9x80&83$tR)=tH4!An>DS@J`Tk71H{z{Iw z>{+v`tLyiHfe0bz$*MywbUC@X`L%8BHR>{t#hiF-hJVb=o76?pv5ML+FV2}Z#AHM| zTX&VtjpQ~ISa)_bG&FQ~SLm0ystXw4yHChq2Go=mRuNEP5{BEP!;&4_)`O5Obu`cI$^)wB3&)=7} z*Vd4BdVxVy=llr%ReXRQ1C@2_?Z z;u_4+;%*uqdowjMbn59T751W?4@svbRu=8%G+1vle8Wv_%hXd13qL?kQhZ$EGL<+v z8!AtY`?`gZai76%|93TEyzJtxYGut+-|KcCzy3@y$xej|?^V(FC6h-w^3(9`ijh0< z&4wGcN?BIEeqAw7Nt?9D&B+mU8cQ#onVnr8lw3(ERZvpW)EDER+a+qB9LTUd`^82& zzVUTmMUs0+lXrP}xsQ)ehQ1y{fL(C#!b zGBWeK^fB&~Yngs|eO;Zn%j7eag#F|z?;p%ePHvTYmXNTt^s8HT?F=2+dp%vCX;olQ zP{;z6aZXrJ&=j_iuuWGf_S67(8`eqW@|!!OUkbSx7bk0Y&-v||m~K*vTDORks0%+D z?=toMufP6!_3G8tF9jao?jCARYJ9fwb@oM^ki_qvwGJ}aBrRtS7v4&^b?a7u#Jt(> zpS}LfLX$Hy8G0`q(=;-$i}Rhv^=sa3uRq4YA=Cbf4SS6A<^ikN{I7BLty{P5+Vwd_ zHL0-M^F>~s8ubMoo%yjB>*BHiZfZ?b7NJ|T=fAtqakOKy!}6;Z7AgDP-!G2nmui(U zo7P9CWU3p49VQ<>awK?w64mNDJ2E$v6w5g{y$w5|y~3;F9iQv;k0>r{A_ed>82Hl{35DRj9XY z*>XWY#Pz7%_3O;SR%YUrW1TO;m}0P|E}GkhXXd-TR`kj)r0a%rW2sV-lRuPq(%?!o z9x`-{G^ePZI(4dAsiDMSSii)+D&0SntemQu)#Xhufm_ErJJxwoPs07TE%&SBtVKV6 z{}3Oq#R=adG}P3Gxw-YqT=U((#Gj+jT`zSfJbah21%-WDUtfRHwK*;>4srT`<@l%S z>k-Q~8;u+{o)`a=7#rK2mR0WB|KZWv4Vyn4Qa_w4cI_Ma9bbAfA88Rw(Q+SmLB{mV z?=H7-Iwt;S2p`#8Q3uKJe-^=-7k{chD{Mt$OZ#7VNS!X3aq_T4U$hi10ElZSFeVjhlb3R3djfG`#ajGHfV&PbP zfCT?;^C3!0T4=8fQhW>|W^ryRBrPRjs40oduBy6PCHA~kYkK>A-trr$rPi{Mn$BDe7}%@jHsnyQ9|HW7ndvT_+o-)qUip`|uLz+XB(eI9u)@QcReiliM_6!h^2!eF7mimWdLx$N zA3b~+r*`(tnZpC8eK)IDhjCvtDLC`!(W4wJCiZ3B1OIinR4(TcGM&E1Op8L4Fo(?{ zrI^LfR|49_g}l60ii$)W?j|J-h9A8W%4@*Wo$AY%FSYdZu7T%Vf?OC+HAB2!iTwrr zkyFdW=W^I9cJ_-cPR7tdW~kBo8b`#Ou)uU{|x8qd=0od04|LAQ(V<@LR- zgS(lSQp$IzdAv@osHm9#{&I56SUu|=mu4o5sQr%s$z>7G#ossAZU}CwA4pM83vIur z(s1E{Jtr%`g^Rdt&&k}O{Ra+kcu%7!v7d0x4%|(P>$liX)*1a;tm+|S>bsd$Vy-`^ zT0YynDKRwQjAkqeQ6`x$e@?<(YFT!9rpLUcs&Cy8U z@JbCizit9Uq}hbzMa^?CBdW0bb3gr_9H=p==waP{JL1yo+i%~$uX*I4yY-1{qVYlQ z!cNDF5&roNZPj;#wr@6x|4?7=Ihx-Fco-QSoj$QIFe@ud`|f~(U*PK&N@ z(6Xj6{@egl$elZPP%vwRt3?iMpl3UQH0G{W#&8)~YCxf&^h~2h^$;uf_3PJJb`b%6 z`SRuF1Ix?H*gvmt)2SM0Qv#_6w2!1zil6e3$OxRuuR{HwpB;^giOJ+i*LyL0pyRYub<+yGPRbr=i&1LK!o&bx$7ykw`<%yy>shg#4lE@7qkDt=P^Hx z{arjrzgxKW0?nn-j{J}yau|&M`&ojKXI*yx(qhAi=b(y?z8+9bhda)(VNF z)K3)(JzM#7JmO8LB&Uw z9rifr5RkvTQ(hd{q{$MQ}s-=iKyd9a}Q(?&b z<+k~mVXlhbsKTuo7n7`A`ri4|Fdp>VjRaG44VjK>s^h$=oeUY!O;bNhzib|;R;?>E zG?a)L?Dfg3%E}Mh1vt}gv)ooIpZ)n$+2BbDfFlr&n1wXurcsy2uTEfnE*o4kl2HNv zCS=*lB4WGe$dUB(T4fy*v(K{fCAIxWaZ@02I%N7xXw^Gu8=LRE&iSe4-dcA z_pT*H^$>}4YN__d^DFN;*N$nPPijKBJ;P+F7HU8CRl76MvMm!Zld|>wK{ADWis`MI z>vZEez-4ZOh6sT{M5)KZEH@`7?#Snb@ro=zTLHjn&LZASd3E;Q{;Hr@ujRSsW;GJ| zmEkTyvyUU~xo*=xuA!#nTeelx-U#`;X3ZKFe$}Wm($cOjE>&)`SUkWeK^*Vr&z~!c zgOcoCi_h1Z3fg_I)zo)6+tubeP#wafS5o^}tPofhDCGS`o3aE!RkkZy|E%PjvjH!X znXJU=K9#!I&7IoO;1j5m|6R6J30ngdHHmMigE?jAo^J4uGDkEk2(r%veycXuw4W#@xqXi?#STh9$@`w&@ERqu^`cEk7#saAK0`E; z)vn3QFBs8P(@9DFkVSvDj0T?Q;dbr%F+Sc6SP`c0I`rl)BW`N0g9=h)b?yQ3TRO57 zwUUz3jKD4yiE=X|qkbHh&;9pL#D<%zYb)Y;|Nb*6bQ?EzVB;Z9-UqOE7hSu;&K{ec zosD)#F-my!<;}Hs@7|@n;aaLz|O!W+k7o* zWNI*M&P2^Yq|=K}E2H6Z_wX2Q%aXkI?F~LwKHFi5B09Pti7=yF2pgGt!>AN3;o%Bk z2yTIzhGt@F%CftB38fPa!aQyUw(QByqJM(Qr~a8M8*}tkAn|x<|G+FaG(X5s@AD@R za#Xga(f?Jl#E+j~gutc<{HLtRf17ss_W)Wqk?ASRhmIa?l&wK7&tWwb)2k z*~O$Lhmp^XEjK@>B)-8-I`dfZ*Q*WeB5u;_y)R9Cdwb!+1(YfRD(9N|Qm(wYvqkjr z81S#H!ye-=ImY)DlZIQ;!qV2BT1Vt06i)P97t~p;6o%djrRH>X$$(clakWEG^W0xW zn`oZUwPxyBCO9Ja)py9uJa?YB9I@7*`1)B6tFJ}R6_dmqN5W@J)Zg*5si`2FhH@jO7!s1C=@c-S5&Z4{8sHceY$n`=1;|TgFCo&z%M;TI{GsG zio#IZX>sPK@t95hkM#y_qxrvo|Hj7cy4m!%1;bXU-Nd&&d-kk06#+dwNdK2e1FD4& z1MN;8c{#a01AQuCm!GdTAn`5zd`m&Znf^nOF`FOkTU;S_wd1k-*0YUW03kiH>Y0CWEfjpD^EI8;kl&CN}alI zK9*8DaB=Ww?`tuqG56)UK@Sg)t4rxM-dTRktfc}c7Jt5W>h}1BwBB#=*Vm0SPc(jk zB6_s%9B$x66R9HWPEpbuG7aO`{iH>p5gvo`6DGCc7qBUrg;FbLnWNObs&5V3i{e!6 zHn$(P`gXB_yJ_WXpl-QCm*>J&(wnkhU6$`n_1H=sTHSIj*xXx`v!3?82E*WsoBl<> zq&9BF_sh{&sc2(AYA7|5)}aci?@$eO`tk9UXFocvqhDU(UZuI~nwz{=!D9QdiUA9jfV?88 zo8gQ?Kj|yFy3q{ZdR}=CaA$=&f9vb}lht-HGrPWLb+Y=L{p$MFi#$c?;!a~<`vN4p z(loQMzm8q)Al4lF$@9{sC$1Z>`vu{?N4!nc4ZA+|UEyMwn%kmkqWl&qtRcN-mc9oF zi%3Cp9KPw9Ob~F?3dktOH`-mC`8ms)=~~z3CI=z}OogpJf9&p-6c%;_1+tS@Uo8@y zgr?M;g9?!#J90Tn(S%Fg@m!h@Id&g-;p>qYr!JRsDJ?lStG~Pf!Vi04W^O*T+op62 zE9+=qpDNPV0+=J5F#5_c15YMlE3NbA&!aoVrrpNQZo{PSb1rXoM%zSgG)-|KW{>%J zes#ZJ=15YkXXFW&>-P3}v9Sh7pv&DW3tqUvlOMs8_=gxCS%{Uj)*a=V0-?D@YI>x- z>BxTWv^J7kTqu+ISBBPK>MxueYBKhjOFEA(2nT2Cmr(`=TFTsoT{FNG71!`prDXpt6(uQn$N2kA?7#pID0bf zyuNU$+pn>WVLOZ7K4j21DMJn3vcH(1V0RlgfA`xO)N z)&=Y2!*I1D@50X6k#<$jS!`LAjk&qGI4Z+sP(;=8=WbZO=KYVgB{xz}3rhcEp*n30bIt zDT{HXq#TNIehMymvS$foL0M_(ZF=@)GK5#0G%qN8rNszUYC zOE7Sg>|sQ!H4?d+T^4HHdX0q*BR zJ^ABblv(G%dviPO8Z}@rc_mi5yU;;3bk4`PivKq*7iY?OE`tB0Cb&JeO>f%)#b+;Dk?ePlz#j$BfSAFmRzgo zXsQq?c-PNQUPk6ZwXr_)wrxf(nI|UzRH~?X4a$$mp+74tD>H6u2hL6y0M7AY07BxI zI^G0|2F$>*Zg+uW$23{}+S@K6dy^T^%?aIa=O;DiY#VHd{WUSMMM^$eWCHZudhcHF zLqP`>R6O@;$xvs!k8c~sz3*}z=AO%%LSgS`YJT{TAvZLXHsXq!LMKA&g9w3{w=OpB8=m;*- z$^67$6-&*=#)gv~Zq2BvuCA`H?<%-@(a7i-!`>rDj)1!@h!(l_l;PXNdv;B)ML@Re z1Dej4Q^Ax--GS_JilecdNKuo6MPXr@E6YnE#**kxuUFI|*n{s9UDgQoC(kpM2C>Ek z%|(>FaheXd4F2QsTwlQW=ngt{I<%q9mXno56TI~CG(Cs!Z4a6!ox3#`s`KQj^Rv2S z*Qa<$3wtDKjol3S+QMDa)5`s`{Lqav-k(}6XGXMpyq8_?`g`m z&#!475nSU|Tkom|M7DW6bMfd3!1YEFsO03X6{6!KJNMk&klyfCO+zZ#V{J&`-GPQ{ zq&LX6Ed#*cJEgw53_hk*K6|#GbOv>%*5uj19bt7I5?0l!lQC~v$N0LmW-GuNKfV7s zHkUJsul`K|2*VCdn*-j>68oacRge67DLY09z4-otcU!s5X^>n^A=A_L4o@-+7$`lD z8~wjL=csed<=0-%%lkz4`DLGy_V0C3MA#NoXn$H=J~Mf0)}njNf@X0|L(PYt{^j)Y z${{O;ZQBG<)q!E=XyxYtSNx^>K0m(_GR)ZApLG-Gg|0+Sk8R5q60hdxe9N#SL#Wmo z8m)I357k;5^YHNOkkXPEt88krLmyAs3PkkrqcVv+b1G<{E%4JJ@V%PUN9})n#6KX$ z5uGde+Iy2R8)oDO40;}@^s0a@MYY;U3f|4S9n7@+fx*nCA)~G|v(MR2N}JDRIyyv~ z(DKA;aQ3fWsY5)H5@~C zO0Wmtyk11CGEazCaL$v>p$t!xlg~eBu}3xga#Z!=F=dwpN7N^vhzg`-Nq4)^zN9N9W=;`zb-LPGSMjQ~mq%AV(YBs`A&>tu2;L4-4u) zjE!&e9j7?5l+K>D7Gy>OdDb>;TyH4twjs?{<(T#R#Juq+otU9coMK(nv5-ojq@}sR z*wz47pU|%IKymp9menOPaRfbS#!Xi8FYb)5&)@C0S>VyuK~-@bj&;Ii6kEH$@5J8|4pd-bf3+@r|INU(iP zL;W0kga)ktdLd-?@#W;vH%yKmzR4*CjWgm)?jjD zNZOsp=k%+tNOvjPQJOjCT5D=7Yr~JCg&M-&japCw09$kltlFWLh?!UnujjPs-b_Pt zlMzYE9maE62G#-bFpHdsqDF4?B;FhI+(AUZM`XMMTW^I{|wR1t{2wm-% zMp4lC_Plb;pR3(TtG&c^bn~ZHruL=B-+t=7?QA$u*<2onhZkA3=Rk5a zGBTnjO}ly^D+6iowG$So79TF4Ajp)JKfb-cQz=%0m5Ip({5MDf~9>?%_E>F7F-UAQkjeP)FK_WjaJgD=j>lJ5ov;UV8 zj?u9?z(?>&3zf9IBVP)~Yj`VycP>Fu`Hg-?yU6y5@iz+@Ih%36(=TtP9TpMMq6UrT zuo^c!O!@v56)ShP>;v#TX47?q{2TdA;b2qIwZ7^FfgCV>Qg`0Gc{7>WF7Es@&Z-yz zCN^FwKq5T>+GcnE`3m4DIw~qEIC%efqU|2!HY6GvlH+)n2i5~k@A1PASf)fdIEHXH zjrDmsS*D|pMSZZ@Id}H5hPswkcu$<*3*fsBRls6wYdy7c^=YA(8 zChF{nk@V81-m+td$${N@eodRhcnu1U1b26%b5*J5o`&|0CTD192wHZ{hY!flzWP~( z{iaTU1(;R{P=Y%BQno+2h%%{gjr>YLouY++z>B)c4H= zn7VZDbDXA8q`bS@85}ZHhmZ*Y@d+@5JCXz_OJHzpy>90|f3%clRZV%*v-O_^P(1og zuD>%NpLCFxG81qaoJJ$%LvfiN5DIk)XAyI(&EXUmUqVPtuBegTV5O`CnbEbCq7C(B zb4^D3J1KXRi>s#he%x2-!!~<@e?0>2g~k11#6Q~kqOg>lO$2ZA`}gn71glAm%@5cm zty8C~xE6lci@InwpuM!#cI*GJWuihMi;vmgqzuTl!;Dt}_UaUhC-n z_~i6JonBceWb}6d?7U7WGr(+hkH(d?aR$BKsVgDb zhl&Zx?O0lMZS4ujM;<{LCq{ikbPi}{IjRfqdv86Fdct|QIRzMVK-pq*L|jnGL~465 z&=QDbbdbS~W`s6@ehcg8IsKy%WHwrRp#NB4iZt83I<9V}M;@$A;ZYHL--+*YPOYWR|h^alJ0&SpCkQ+uMqZr_Fu4cD?|sEpgBN_Sr6>rpDc%# zbsk5#voiX$qc88ZlH`(+-@Vxe{$;^Z^-iXJqHPhZvAT< zo0b36A%{NoTn;%oxi5LpAM4EplkomJ-3P7eb4}ippHiyb z(9w#Ei$l*4@tC(iUI!k#4ipAxy~)-s!(CcI<6Pm|c z2$~bb%N&})mf@LdulE{eL!+Zw(fm%k9G=8$K=;zIu&_Y1j23xsOKz`z(Zw7%f1E7) zgk{@BQTN~3D%Nu>Qjou)+j3l?cXoBZVsBPumBrjG zTjyie*WV=O8MXgLcUb*7s;jGuQn7}($HS_*DTQraXcGHEw0}HQ-yh372_eBu`{v6G zvH*D&!pV)rU&*Mk+a(tFfCz@10OgkgDH0;d+O=ze>6gDp*&IA{sOGPxXP_k^KA1Hp zUx2?5s2yn8i;|M0qN0LsdFzQUL|1wRr-49(;Bxo={V0Cpd!o-| zdCXmqFCf`CqQ_`U(`@+iMNC+Dw%33fS0 zX%+YHE;23_-RHAUuiV`QnHxq$JBK|XPp@%6V8ybYmXb<6$hkAMHn3WiX}omm!Rz?z zTU}^%ao_b=@6;hNGqSUfS5l>Cl|<$rQY*}?wd&Jo2pRGd&n~%2J(_n;!u-?z$BIs~ zBic#t`XmgrgfwMj?v~ztXC_!8x+5<2+U=jADp+GK-h2V{m8J#a{<$$1cK26iZr-k^dE)#B3cs*1 zfN8^dwvAO?`lb4pB0nk+6chkIB&O$}qnYNDa=27|!DyPGj#Z^Te^J*ebZ~)n?K+F7|FBrHYm5 z6Kn=bdkGZO*)>HV={S2wl%MRlXNva<5&)4jaByl_H6@1aT#1~moe zEDVwc1_t;|+$QLf%A>nB-NtXdU-eK*%0P5pK(V{BsgT&X@ccqN(T%wTZW5uMVVtmfy z8r7PY+}=E_$*wvN#Nb*k1)o}0rpLOj$8mCU5`hOKvUbfHT~=DEI2AwX_*aIY)1S(1 zNIf+6Usq%ASscp0bVbwGw#FjdwXub;w#PAJ-w+G-U$MCVH*eR!Ci{UQ3Kf~5(uf~k z(u^V1!a`r_G6h}` z7S%j2&Z9>sAO!JqeT8Ab!h!-B7XAZXf>b3ne+}Ydtb}k(_}~EG za4|X^yEok?cyWII>qrQcNubxfygX#-R9c{=C{(zpD9f^fNjbtmP9>z4ze`Oz9!=&l zgf&9D(a+KhGc_^UNV?_i-QojDBPTUG`=e}z`-K=#9n}GA3vWj{3|pa`UbyfmEuM-> zeNmA`|E?OEE^P<^um-K-qmK00t0g(?tr2o3R}8t%#W$5Db<~@?b4Fq=t^%B z6*3`CBDvn;9aAOZ4(FW71O{(19zRu+nAzFCkpSN*{2qVWq{7Y>VoIx&u!`_o|w( z-uu+5zZ>@H0<*?%$Wx3vk{0^!zRJ$#bFt1Y)ft`w`ZNhkS|b!078aIv`r|Xu&eXu> z7gX1*rwTr|%$9&)$%uT!uw&=WDEUNiY$>#>!d8D3Y)k0H052h+CR?his6cV@zIDr9 z^l?Z>>K!)X%2N~YNkvEi-`?Dpc!?YHvLmb~$=}%IwCX6%KMYy6O zQ3kb?=G*>ytd-npmndg7edAw5)u-G4j59MbTN|B($$6dPgzaf zeSLl7<2Gee!!ibpZ zAf3Jqq7=RWE+HWYj3)qg?UVwAYzhk$94DQ5=1pXVR!btaz7#K>+=P~-7Xk>}5+EUb z{9z((d4Ak3xl2&{z+ND{$qeWUkWTr#8KwR?Z_qpkgHkI0iNeeir{Sw8u^&<) zy#ajP!K-hT0IGB-`_k*D@$oG_FiLEff*&5vYf#OgUQ;ZIT#rkSibBFPO%OhO_`{`| zwNzBgS*x+TMz>+`^LXawzfKmXzK!GGj*cMugiB5dDvvlANTgFyQBeT_Rq8D)Ea}#J z1j@ivgT~~_Z(YqEz#bd8|L$=GY(*FZt?+xDaXoK$8cZWsOEN7%1-!brwB*A8OgR~5j0A70hR$A1t^ae+UprH^ z7jnUrY5%pBs`?7wE{KGyHAXQhEn9(3j#^b!RT7C3vr^z}7oWdpyYJ_>P3rIU>o<}R zF3sF1Ha-j>+A-gdqhjgdh!#{wV=v47O)p()mzh!uFl__?1Vch$VPV>pzmz%bhaR2~ z7l$gWOW>nC7AGMNlKlDezrlOKRzrvon3Cg2(Lo#P^L9wWrGxLt5&7jcJXx*tEI?Ir z;yN=qx~wm`(<>!UrQSuqgd3xz)PMwWWAbCdY1|D0StT(CF|di)8os@^9pia~nld0G z=>tFBnKNf(W$%>`$%T)P@9<%mNwrfl?tLV7Q=Crf4*JBbhgC-NunO!JvVdP}Z?2Ux zvxsdE>g&M1!(;|DzaS+mrs~B8w9ovzyHm%RDuL+VfGU0a)}Kv$9#a`WjF31NFsx?Y zY6Vvs$YbUc&Ogxzf^Zpti-TfcGG4yG1COCgGkNBpP1tgb*cY`QK0Ne(g|R1EQa!{V zG+VhQ;;z#~|7~mg6DAIgOx@zEUxecuKSQEs5-`DFL5=XgAu%L>FevTH;WnsEpFVv$ zC?5_ut(@R|@A;IVeAy2QgJ4xn4cFnrKPM(0U7=o^T>Rq`iTv~U?2Fh~cJFGc7S)+D z-|V6yq6Xpu&^_YLP}y3hmeUYX20|3A^#3R@@6v7-b~{SsW6V-yA}Wn>Cn-4UYtZC2LO`e4HuN12t zS~jPp%=}}au4QGV#oLMv5%@;~V-`dW1_Fbj3cJWhZ`Tcr2Ry8SsK z9X|b*rWa=67tRykAV7A3-v~OS1gE084{v3Ygg5h7a zqb2s#TczS$(?m!|xVdMh^GrvWQl33~Q&Uqj7Vw&G;{WlsrlvqT>-L-hR3Y#MAO;1} z_U0+3W#gE_1>rVt!{J?pE{ae!btXj z#RY-{k<#CwG!aM?mSS`>C2)&ldZ@kVT0G;oKOYIlM-cI32Ai^(Rx~+e27t+<%>H2O zT>JcwIgWiDg0s5nP^^fA{OTr%Z$F_^3qyJ%)Ka*hq{=%n&7KI;@$IrSM_$=Ra-fj4(m@9Z;6F90{;2@l2W`+w)6vscKnuW}wF9A9M4pwF?g$2= zfztgSJuIhHG|W`nw;vD?C7ppL03`%U_F-O^4EaA@bJD#AhQx#fVxkl?9lTf)SgRJM zhXgSF3abN5195>bAZq>W#h;Rn=Zy>)Q12t5sfw{JL%`6|Er*9)-T`zAq%xR!M_<=*nk`bNfF zN<4B37S$6<8s^+f(VlPT(l{~sVITN03~o)MkLrZoZ^Js>%vCG0t!;LR^7rfLrjdXg z+jNwO7JlIU&5PdrENcGB$cri}-~Qxq;QT-Nys&S0IIVmpOMd$yvc=KW7ut3VT=(N9 zw`LhYD_I0Th$Y2MXavYeJCoP?!Qi>4rzb@U^c3j~bQN>+lun`z?(KcjHq6B?sjSc( zywq;Q&*U%tSkZLY=Lh2cE)T0M4fp%US81N?1;x<|IbbKxBIZkglfXZGN7g#d$!P(7 z2wKe;zQ-eoJAlQtJTC5}UiiQ5wsw{W@jaj;RnMse4kSr5NE& zw6tmR8Lv+hK>kK#WxW9T9x6BNB@6TO!j`Qg@O=|bDbCW(O`8GPP#NG}p;UqvgoQdT zZVxWg=|4U$HOy3rJ&Aoz83;4b$2Q}yuZB)Wj0+)jvGd?#c7sWHz4t2_?N&f;c%(5^ z&on12Bs2qM0m?}d0Hx7CWR8LiXy-vuwy8x7zZ{n`Hs7Y zu!Ad^b!$70hkx{a`-VYHZt|}G3`m8aF}C>A7pO-wu;E>e=h%fJ-+5)NR%Z;1SLb`3QbvI8m>>^J6-G-DqGR*D4BAUhhq_d9yyOFqkMQy!ypN zkojW1g%I9_!Jq0jBI(_5BbdZtZshbiV$uWcR95Nay(2Co&)-9(5GTEPdiH+k5$&gW zC(xYB%hw{#P-S}__Fx-^g@y`Ws{Gqmo=)hj#p`#On|niB_-3Ah0y*=5pVHU&`f&V7 zg>zw>N#us--2n2tjJGAnaF+Z2#7)JRL%V6Lx-3fG5rbN|KRa`vBth0ipR@UH`u{+1 zADW2-PG!QWNX?5ug{@nkF2_nh{1OyYqu#>AWZo*WfB$|lu`i@Yk43KSMMr(`;5(8k z`WPyz@RZ+dQs~I%Y-cefjrA5e^lXJ7lR+Oui>}gnwx%H-eB#8L<8J@2RDf)g_YfKg zhz`vMkWrYNIIF@1P2F)=FQ9@D@Sbbs; z4#o?~)IF&bmi{`@Y9Wk1)pR%> zT@mK$kd-?Dq);CaxX(~fOYuMp2w@f|^9Wuf!jX}GkeKn8D~G_Uje)y_`|=z79Nj<` zn1gV`abWg+`|ceMLp)!EcxXt_n$<2{B=m6uX?kLU>FDJ*P+UF1GofKdZ4p0oXwd&y z*AE;9+~$8Ekk$**HGQL_;sj;~2{!%lbO2KDEAI{18$Ay{qr8@; zTYby}a;@r`+f&)}RE4*{eY+gr_yd`Dl@K2uwk%UHW`wKq8QpGS%!Dli7f4at*o?!V zqkcWIg_zk$GBwuRqU=HBQ7F8d)^9#}Co;EbO*TNZElrue!KK||0kwp33DFAft00%S z(p&ae1DI#53#+zZpCIG z%4lHVUNXc+Ry?7k7JSg!$Lza_zrVbTzZ(l7%W>?xAsey zysp!@M3)~YRu3tQk5qt(;k>gNQOa-e|CqtmU3ZQ6GgWH1Z`$e~sEVYqYw;Wk6^QQL z&G1;j)TO#0@h8Yq?=e=wG$VvcgNIgt_UlNB0=fV!{_*1nG@pd6y{nKaI^i3c@Az>S z+&Q4(dw1@@bk|i~ zy5W&0;hPOW3&Oh#E=`2gZ%nJf(hl!>UAh8RhnP+|>I;}f#Vj+{Rczmx)prYg4rNGf$ufxDw2Egfcw+uEpa1)(0FAI4 z=TTr2F#zDkgB1K;8JZy^F=YX^C#2*59bPj3VV&sO-EU_jVZgk}n?=aTY!Jr>q&UBT zXoG8sm;$83tSjuDupa4W8=eB?kX~0+rJZf~3PE@I$`uTlJ_-(=t`%s2<i!ftti3?wY5vCFBb36p}q+`Jw47{C6e|F=e31Fw-bQ{Txf$+}w=DD9@%_ zpSm$gDHhlRB$*QsU)sO=MeFJl9;%1opd%1Up;b^sgbNY{41mWE=B*54bugZ+nK`v- z^X3PBex>$9f$mX%u;rBa|AZ}_v6K{h(c%31JFCh_s$>RDFTy9J4HykCW8o=~3P~d@ zV<7n#V6+0i2vWy~0UH+DNuap}2rN)fm})BV0C^S^6l7;-hq{*Dynfv}?TC~rqwAGV zsd!ZT9EX^9^07Sdpei!96{&ecA7@iX0O zzDrWFvef`##50Y0d+|Jk9wa&A>TM))t)lVM7q$WHufG#dxOwI= z(Z=r5{|E*PblB*VR=j4%PV7m|Sy@?e&@$QekX!d8-v!;1RqKS_NpuR9QqMF@#9qKLGo{{Y*cEu0AGK;^U279>3G{ zwj+NBdes*e6tGFSI|H{W=qCVy=I75KD@_k3iNYYVUuKnp-ozrp2S2vBxZW6HNrWc= zhK+t1hfTj$aUegF2l*$6KlR=B@88ECiw$4h%E`$g9?-MRXGs#23qRZ?@IutbN>-pg zOWi?Zn3$M&8{D1KL0FWJ<2eUJHZa2%!UE`o2cC3VRNC?qXL~rvsM4*Hc33tfFy(!ioI88=tVO;~41emWAG!S-UDMk@Pn|%2Si+SWk=oeUl-%TmFPkVpP-@ zH2m@}6o@OhaXg%#0ZYCQhF%nfgI^C4zkMn17o%o^^K`npg3Q#0L>?CQ3r?lx63*36 zwKK6&BdJ-(6(ZV@Z?n#>V$B<(m+klHg6h)yazJJh=tsyRI4#XGBW)Fag+Fe*$p@nZ zivw{CDoXfW(?8Az!EMs)q+n=gp75R!6c(B`wA7fD!9%WkAwdh;bbZa6S*?b({hn)j zVc<3e>oDBPtw#(dKLEFjz=5yS5w$OoYn9D2kN!CD|a|6{0I0vLmr!xc&) zv}3?J457pxw|mFpvwDj1H9z)|kvmMFFrXx`c}!o>i-#V9DN=}}Pth?AW)yaHKwyx$ zze)s!$26X!M!h@^9(YMGnGw%N(MSU+cuaNMdg3DacoB3+ka%j5-U~I(kbnRsZ214_ zh7hxx(9ARSAYCvqF@aW2cTgujNtG)g%D|(QZHV2E+Siqp*i6Vhu0W%(;|cQcSi|>(MoUpap~WBp#W9g<7x7s`W|gyN zeXenx4}8AB=eSe3NWMWMl;=s~WNoR{&{m6Q=qoV(k)a_bDymZw8cF%j_8&8JRJomN zdU*UKq;|2eSl#9)j!xxJ_v57V@1K|KnpYPs`HJ)gsk4n1`Nqb^p_yy=A^=s1JwDm3 z2am#Oj$PkhyHT03U`Xt@-Fiw)ety2cg3#S@iM3BO(%>qkl<{l^jWe@!wV)ggF<#H@ zx8JDRwtc%ck@|RfcS@P%cidLttM&aw^-;tuU&qC2gK}82MgGv*OIS$&dI?zZF?I0O zGD2G5z8_%(DJx4zQ;uHqKjqLzhe9r-sL`3JFonyUc4_*izv~l8HLXRSb8WMd^oB}+ zE|1^6zEafi$I#Q$3k0ujfMi!Lv0NfS-O7#;Eb}~p1o2+&NCZ1M|ClWK`!iu&ZYCd3{oDmdD8K{66BLh zYD-%x(ImB=VqHC|sD{U$ML0XvH~bsGjX~AKU#OUUDFF-`lQ5hz{cr$waB?cl%}qNi zv-+_TL;F2&=bGl%K&PJ`>x6L9zE9e(C@XdKvnHm~9PtFVws~{t%IJ?DzcH%kFy7Vq z_U&0}k_?{o)P&XtyLt*J6$P-HUUU>{ZBao%xtN7Odzq8NZ(z_3KQ<_=^xI{`*TtPg zM&Bl7)sZ)Zhd%tAoyDa{+QQYbb^~q-VHm=5XNad(=@XBMf#b0r5`(h zr4mL6KjIOD6EM`{DSXvsV4}f?fkh#dH~0-NU;YLP2M_bq4qEMOln=%SFl;2jWCtmo zXzI~SfoAP2wtL^!MwF?*{X?P0@rcKvd)uV$Amn19sl%+@2Pi_x)zy=~(8zS5YKV!6@$xpaeN)xftZ*W=-hayN$7qKF|mu(y(1&(754!88TN{a zEugfncOCHhDhKop52<-9t>G!;AR=v4*VOnBHV(Q3!*KfXzv8YP=HUr#psZ02Lt&GS zKU(V7cvCq>LyedR2dZ~*DF$A{lSpA9HXjjAJJ3C{jAtf5;swyeP5Ox)LzquPLT1o# zeb_M1DXxz6NVxekPt1Q!Yy0VmvEf?#myFqWZqIn~@bZQQk%H4Nm6`wbmG-o#;g;6c z))t;C_IrYDcrVD{v5z?Qokz5kH8f`N*g@tObrvBZAv7f5d`bApk#$LD@Ejjvd*P05 z*(bdkfS}8#H93wP*(N0-B7$A>=44jyQmPuKox$$AsrR_`PVUnfRXQ_s2HdkoT7E$R z5O!$hx*e^`U3o=2Z`*%>4qTA<4lHyh8e1ih>8HYyZk zr7^hrP!CA-)#l1XV#ACDVN`;CcH;)sg#p`G_s<{e>J*mwuVv}KUQka_b7#+REBtc? z0efc7z-ELqc>dh{58@WI-!$|PIDT?)Q?pWjr1AgN+?mJKytjLQnUiTDQzVhGq9_?s zS%i?1GL%X(q|C}xl)V?*lp!i}8KOahA<@V-M;erf$`mRMQiiCW_cwdr=RW6ko^xK$ zaGvM%$Gz`+-wkX1*7ti|pX+mduFrMZ7w+5u-U!>IYb|33u5okAa@O^mHYUK_Al+Ga zkY7q(-k6ai=OpXXo5{$?0DP~Ex<94i=xqD*AjVm*-o0B}l-c+3@)=)VWuMu3;L zwu}w!sI08qy}QW0T>jkW&*{d~h!V_b(s*rk^|u@eVjj|6Uv*j{1=-owNn1C?{E8w| zeJ|)QEOsHHIU54l`)sxRI)&NF3Z&+l1#jT~DrG*78=QA_I3kRN7mjBOlp|UJ!GkHSU zFjeSkh@j9l?xkVBo%&RI!&5>2Zd@FIW z7PFG{X5&zSB)S}mOCN!jnHn(fjhHixWBTpTt^F~w!t3g$AO@})!6yGhm6c=cW z@LbqVs=|O-dO)SY@C%X4Z*dLZv4~%f9#a+0kzfT8*3bOJM})CXP6zz`zaTwf{!EA- zi1U8szlhe5wB2pxXJLw_HB3=#lK~2iKacoFMUb}B|B2I2Q)OoxwdBqv=+a9o;{f# zByaC~2A)G0d2zh8zV=IppC;9 zifvBfu+gJEZv57alRR|dN%0es$9&w1YFu6dbr#mJF*dcZM}n3bHS>p6>dQoG%zS-5F!W4|>NrzR;6{fisZoA+&6v#i7;Jswi))Rj2Y$3@LoR@cyY6M3O&@km!!*M=hDWnWXvf7A$F zGq-Hqb-m&BD=x%~3Za)(H%K#E6cY5EiO#J}_&MP~9ufspM|4SR{^_u=#gE;MMll~L$Jv;%`; zS}W@g7&R&mlkCH}X>{d|q-LNQXuNl$D$x#3)91s6AY1vaL8y0orA%_4D6 z`S4sA1%mopG*%P;9H6wN;hiak)y$c&RcaG4eAuu{@48o4RV5T`6yG3mKQm!V1&0K+ ztX{zY{j)9jciuHo6ShPr4ID6lKiP#MPjAh;sdi4&ragi?LQ76D2+Kz}%!LtG{)M<2 zXMa$Z(HR_lM|)ic0fTAevzw}JJNaep zi&VqdgdwP}HnGcVvu2nJE2g+FWGG-MkmJLUkLBfov~vRj&Sf~at{OFRWVfeI41Uj< zb9hN$vw36PS5AmEz2~)#I{6D6%3F?j`i0f~ z{slf|sEJODRA^OWPy${LUi-Ry!U&0CXEqP`WB?4mTkits_O z`j`$|jeOBxb-3G_H3#;#$q$c>Jn`%Fk8aaP>JN^vvReB5idnJ_*p)8>gBLkDIsX<; znfj_XNpzn+Mx6JoDf}SWkkel$A+LbF;#O(;H-4SwPfQCj#89%K5_xIhnb7z~;;-Jm1 zmH*G){)7TO%YV zUiHkbpH)!%o5R;T)JKj+3look$3fCRb8|OBB@jta^ESA~LxGJfZ>c5}uN0N^{!zhO zJ^xjaWNGh_y2Ru%jDwW4SRbx?oWe=VP@*1oDid~hJz0Rz=ikQUAv@YjG5jBi(^dAV8%mc) zwjF_2#7x(q*s`61O7`A(c1yEANm&pI$40LcQ&ye=g((N_6=#0qiILNy_ubz{RQS`YGzkM~%;`r>9M{^Rs%Du4s(rp-5&oN^PPBqTujPWxygHeD`2aw3ik z>k>6r^NGTLS8Ac=hNNJQv%9YIK2N2Lzd9{5ZjNX;kM`K6Q$eB@@ry&;Q-p}PLx

xj`_2O^iW*C@R6!U1gB!*Cxv#D9AAIirr*ZMW_L(hoDr#H|e))HxM4LkezR_PMV)iLnRiQl^r*9J z#OTo_`e4KRc9lkD!Ng2N(B}&qNm^gs^y5nK6a-++$8F4`HNnQ9ilIJ6(v&|MX(USj z4jMO0Fl%&;vH6_6E#5M4%rcn&2p#{yLG9nf=yh!VYTF()mEYQTsNbcQSoZ9xmFibz zuU>9$Zb~xxDMk97J9k!8qhe?s6>b@NJ|ZG@R(J3CL6n|aT98lE!?UU2Bo-P#OPdJb z=*(ctIdPqhS1I7y{Hh^pv_9C-GVW#{u)kIGcIr^{)UK{2F6OJjMQh08;n~Dd?K4fP zXS@ltnh21G-JhN!|_DN;&q9s=`)jlNN0myMV9{P=!)FL4d&m-dBCj z6>ag@ql4~vp&;x1@6kx^5FP%J4LLy5)bd{HGv@db$pTO-00+njgs#F!C)1?S)%d?_ zDJ^rwW)kPCylMu#^_0zG4t#GEtq{y)Z^yq7I2bXo|8lYFg0*?1De zNZ0Y-f4_;kY2p6LK|8i@m&y`oN?FIVkfjnXuE0G4(Et_K^8~*}#wuU}TQ|2VwA@D6 z?%KVfO)rd{k>Z`7eLgKX1o~Dn5&pi~6=0#UY$S_{3MW_|vrN#%^4WA5Cy8fv4@W?5g zu;DOBx7Ccj535}g^elD<8}0?6y7E_|FJ3GL?Vz3x{?f7kv=JjmLe3P~7}AF#Yi2Dt zs^@S~v_ehI&12mjh*(A~iucav%F4I?FlFM2(i8GkKhAcn*abE=N4^yp&rX}T zXjq%%>YsC(RQCXc27jO{K%dwRoixqFVr5k9Lac<-q1~=JlYWqJ5YcHE>(TN#$U2M+ zXVH;g9Jpy_(p8z)eQqXKo6lNIxCz(MiVB5x?aH5B5_&5y*lp-G z8ul>(VhKjQ5Nj~TNG1caOFkx}kpFDfxm5nX2r)SY|w zL|!flyBVcj)M9M#gFaa0Cqbw&7L0-bDWB+6ZEh3}y(i2BIrLfZJJfl_>tF*neNNSf z7ev;O)})<*6j2^h=h36%e}qdz(niP>28g|7ddjK1X*VsP@htXCF_p6=W;uTHl*CNJ zo=33&QHl`r^v|*O*|7cNg_){i2sm_M|t~*8r(HbFj|equcM4t zw+Oldyi7}MkB?78?HU7&SWso*y{HSJPlEfxGmta_DNLFUoxM2mj{x8RQcV*C^> zgrb_5;^7_U8nu^5>pi-iEIXfZ>(+REeSH`i7!o*do&NnND(pXc)Pn%R$)0s&*vH5> zncNX!f5DLuhIl|`u=Ce3W2Vx5L^qzuwO}M$?G|2El;XozzrDwi!MxRPo5SZ#QlTb+ zx&Fy9AV|x`jbpSCxsTKQ`C)^van40)5fn6!b!k-c1;jA&vU{R1rbB@99NwRCcI1G< z65d|b%Xr+~_A=vvl!uJqcIwhaAFlXXAxt;V4nQpyw3NlnWH=JV51-0bDYb;zq)F-Z zb8H+i^yE`*m`6Xvqh$$IF1!em<5p@f^743GVLztzG2c*w8tD1cr}xAt{C5g|bi9+y zPTWJO4lNv7&_ODVrlkn{CD_ss=Y_VYtX5}B-K|zb5~77ULio)44t_LcVO~zx4~sTU zS^oH}4-!x5pJ{1(p)n8bWD3s5#|-(Rl-i-Uj?M+>1=)5!0~*+Uc)AQw8ai0**;7+C zYA~!QOxYah1le|!zp2MZ>tpC|+{pT_t=cV~Y8}loMK$IT5h6rG!|_1ZabYQL5JDy8 zL~682{`zU`%BUO3neziTeK|~0V7l?G-CmW^UqqV#)hEHOYHzvF??89)(R<*Mj`%`4 zA4Z&kC|?h%l8zlZP;6l?)cRT>i|E*QH?+d0QJXGXq!#;#*mOJa#<|9Omu)BDRTXDX zsnbsCLQojlCgsJ87iI3>|A>!|?-4GSQiRGBYCA!Q;O+a?ixgz*tDd)+p>P_Mv;bV| zGzXdK_V@aXj<5#OUJq}aypN5kk@zru_Us{fQNaOmdV2j_wd-IT$sNm3D_-?zuR2fq z&y-@2wP?A}8e`cIYDGM3tEiY6IQ^^*{FrEW)oYoGeM$HVFPry0e%i;-_%h>QOJO69 z9Qu2vNY~U=T_x=8ArZq zAErx1M&~znAE#35U|)E<3OPm;O5{VdSZOXWgQ&xhvz}fIf$Lq`Yx|C=#Dtb=aFr1S z1H3%$rKI@I2*e90_?*S!g-xz+s`&cV+|(40PG8Tp+j=N0Tegfgo@QXXK!5*5=*UY; z?G8l5U|XD0TtB$WQ0+4h!j3PzOpk}>T#HN?m#BQ)tD8)jsl2(|X?(=}gXgI1WZQ`c z9Mu@hSu}dg7LUkPoF>kiW&;7Ic@%&kGBs}UG0g1P}t@;JjIHQE?>D485g(NiLL^MW=od}-3w?+cyWo< z40PqgMjCc*^nf>#l8M+-41N-CffEcqX|LS4_b`WHF)xE6BXHQ_J!MYW>VM?EUGoQv z8eY%~vwOqw9Pd}&=<5y7g-yI0@$!(ApcI)#udN_CInvJX$0|q^#DjVU4p}{f&Hw?J zkhA2)e?@*#Id^FG(624FsGHVwvBnSvyOU8PMtm4jR8}@v4p(%lEUb~*EpAQU`qM;C z^Z`j{Jb!-s#4qH;*Fe{ZiC5B5cG9#U`&Sd&8qXnSEL^rM5zdC=eDm|JBlk(vFv@5& zqJDG`niEh!e9wYmJ7EP-<_iy)pFcgX^&K*#oU6DZJIt@{eQj;1<1>iTm>2COAE2{Q zEPqF-EGEl7n=IroKo>XGkgVFc(MML zLBYti;t@z?1`IsG<5%uJ3!0#vND+027ZW^V48%IYA{OZm8=j09^W&o>FCU1CDhCFr z$v=pR!_HdtJcR9|APgbs3=Nau@2nTEX{Ow$v0BiGEnBwGnNd_w2yv^MYBn2!mGL*j zwS!E36HfW~q%si8P+#)mw4TTVDYeKc`t(>4SR*@#`Poy-7n7gFv=pNf@ZMTXAHUXw zEy%ry1&)fUYBJ0`i4DrRBXXC5YirKoQ?Oid;T)dXUgqjRoE~PK?S8Bph+kaf*XAEH zPT%FG2^}W5=vo*b$277pXJnU;kJzKpx;*#1YEv3*wYY(?8TT`q?k=4>^CoLV4zpV^ z)ri4Ertjt<`2v?zR#Y6)U1ABOX5y!nhK2@sjhhE=k62W=@E1(EwfWagvHv2X{6C?A{%*c}+V$u&-ybUC_d* zcIkow^D9Y`Xn06l8W$IbQu_z+4OFOS8q<0IfddeLuS-gB7MXjX5SW3~N-hP`eX6R` zUE>Zt>jOLQy-^EG|G|SF<>%Llz0E6E{<@07ZVm(&Jf(>w=tdtpEyYokcgf6<$+nvt zem_IfTiKiSZbB?&RVV8zv~|fXY!J*yOO{ zbCjtsDztt^J+O~dHhJnovN=n!jh~P|7=|0Lo$_6m&CUt05i+KlXu#K8_7D$%gKd-sk*#N-qO z6GEPd;)NdMejd<1^?|*51yRw(eZf%hZch(XG%J|I0mz7nJfvENWN(JEX(5n_69`Wt zP_fgL(Us*9d=51Q^>tr`hhcGXF4Gebdz2$Bnl-yWeTIS#B@KQ8ifVK1?BKO+^)j;4 zdB#L=!kO*dx#tUds;FoMoOBJnKZWS5LWRIf0T(f4Uq@X=&MZb^%I1h#zZMeh07bO( zfv8wos`XG&d7PO!$ekV0GZil2k@MY=sXXbQckV3W55+CyF#y!+uQzE&`P=pBF3+W6 zJ4yb!1N--@4Oz?9R5dxNmR7=Bf3meO5|VAlUtsdqIkvV?0iSrrabFt5&GfICk8<}B z@b^I-&(h6@1K3`MHTb=D!hJ|hj!9UViCF2&vL)$!#GP@Xg4GfedwM^8ivczh>)Ng? zOzBkn1sblU@YX-=$}-t?D_2^CXVXUMq_;U#Yy-$(PA`?QwnBfZ8$N#!YK)5?OF(Xp zY9w*@Zn$QPY4lUpw3mrxcbsy)ISXMs#IEl$M@-?73_J6#-# zk!VA*;uv8Ychiqzf;zj7_{G8({J+;8nAKKtbp@QFajh>C$PL*Y09m-*vLj|nel~C2 z$$j>vakwB~E7^ONTdssZp4G8Y?r0@k=v~yfhGrKdh-#%qZJ}TLg>vt{zI(`c{egR? zp3uo=)xC=os?47is9g;hYfqT)w-(@A1)^#9Rs;ohyT#`&cJrdrwjPuXY7|A5f7tt7 zJCzf%pfT1Z_Ly#X7Db=yw%kUNuRDZ*t$Gq-QRWbkcTkNLem8Lq5p5J3NI?y`0L)rb z;-GN&CN5q09aie1I;OE0RS@Ep7i4H_`&@Z=vBtoGBe^K$8{O_|nLWCFTP%g=L;gWY zE_&P`<-50TF$iLSHIJ9h%^~jSdr-M?^O4xD%}E3U8NC~*Dp_en=h}PYTXG0<-EQvg zwTKw323e=hIIi#GjGj28AEb55o`aHx2$}erSsF$H6|AgG#p}Ju8lRA;o*$Y zl9!ud3nkUJX`v=L-fXURi%5w^(Z@!gNu_$lF=}#y`g1UpTH%5sch+Sjk-`>oyvDBb z!r1Q*E!P}EpN!D1Tl@ApXv+EV2R`Hjy~ky8DHIhdQ*a8UTb>1~|=fz@0m|^Y=xaxEo;$9eveBG(3NP z>IqG&b4;is*_O&aq$xVdz`Ss<;eLME#36UIesE4L*LAdh&w!pTq-=yHMFjHey^moM}T%D*iCYMlNxzXGl~SxveD@13YyRxpvKCbT2cPc z$~@)yWf8A){P%Db8*dwkecny?og+v4X0c z)ho+cXfB+1V3pDWja0jKb@nCB^h3%inVru|DSX@(rjYnp@UAB(*F8#WDlY;3nA=)x zUann2--yh>CZdjkL$;$#|4|9C@QFE4)YTsFZrloTJ-x*u!ZEkBPk~4JRLp3HlqXW5 zG6^9NH&p@`h+>qL?wRCav_&n|-r6=3fks<} zd39k5BqX_s0+e}O5}euLp(eqWrOz&*$4m0>)L1fgI}L+qgS;D&M7aEFfA5P>X&%eO zo<`(@A8>@^Q$$kRt#fBFKm|*2IP6cdvM3l4=DNhxz!g%Uig9{iHY?H=gxOi?aVkLx zZsqy4L@DwO1FH{&%^C3)q>VX9%#z25g%s4dX)sW4ii$QsX%TL1%Chd?UxKN>Y&#@r zzWbf5BMDg)2ST~YMu^j=VfRjvn1Eljjy3E&dG#YgGh4E3S634a&j`Xbq3X2 z6XDR%owQz2Y7v0k9flJ9o?VfGqtnLFMv|e6m@x}VijIjX#j|{&>s5|Ay#g|)(D6P) z)8_6ygBW(bKI;7Wr92S?KImHC%r}x)G~OdLF>3OzIE|6peFFmCzkG>blVcIREcY!! zjbW^y#z#71`tKqZ#=}zOWAM8g&F?o}#BX=#JiiR-!uOT`+fUKExf7Sr zS3P3RrL)V|g1E~~eXID-?9?*qwc?&&%6a#w6g3(o`3NPEYK)=s)Rm1KInq}h1}gj6 zPMU0L4KEsT5|zg$Iy`Le(zM$y!}xM;{Mv|9r~b^`nx)druyEVf`d&f%MXYOGM56&v zuW7e+HKL7&nSHbL{_zd34o;V}$p*Hoo6=pFHg#$bL(i+>;o+xGzk!rJfn$&ygM1YR zC|UNv-N4$>&)4^q?J77`Uv(C&aLl`QeTuXckjAE?S0u=&wdZJmi2YEc0SHd|5hJ*y z$h(i82~%F~1#eRMMF{EfsEz~(q7xAiI8jR$}DOVZWq*P?cTXlv0b|r8K-VT-Nd_>T!A`;u*13g@%1DW>==VW z2(3nw!lQA(aUE|4A~5sA`li-;1$7MjZp3L9!bsCT;TxK6(gdC5GJ;%TK12<6>oXKy zF~WmtnWjuiDIKz^WO5qALASHBV<^@@t0aVWii%Hl=``0mRz1Paw~a>~rBGCxI9nUx!C8{PObUyxJMd_&WTYJbYR86k{2a; zz2$6B^?;4%Rq|E-=FKJeF4}r(wonp$q$7`OaB*#IpM3*?udMZh(_22IiS=vaA^c^L+%7Vu5{e{QQ+hsB-d z(54;I8K-c;y=3OdflP6|hg%oy4M^+_4DcoUZX(N|T2I<57zJ_C12?=oOr4linWEwT>7m)gE#`9;z2w7Rk7wZ5 z(&t^2m22p8_E%R=pt=XZ2>U@^%I93ys7=tgajG-wnD9Uyhr98;cJSLe;5!H|&C}-*0yVWaEGx%kR$l`(6RUh zCXfz~)~vP=Wf_cjTr)t& z2?|q)e<<4c$knqNwzRw!ejP$X_*d=s>60irppdFpSgcZYf8r$s=T}#~5edQQMVxy$ zdtzWA-fr*Ct{~J07&ecT@-c+t1M;c5I+4>L{1(g0#hg4Jc6?l1%Keo<8`Pz+A91DT zM~6p5N3W!mtTAfzf%#Q+J-uu(N%iT&>AjtmloGzbJIWDt21VEzZ0I=stm5t zi}_O`5yii6m$vQN1r(@rU8;Vup@SO~;1DcCMt@ayuHL2$H_~F+=o$(z!RAU&UB2vA zSD%xA{11~=Z#PMz7D<}@;b?3!Z$u9(-JPvo2X!0SX{4@q3&rA%W9LS#-_mIB_2#!s zRHt`rcgp+QKE0!Ncb$K`)za|(W8g8?&Wf; zZgbteVf5jw?ea`(Cc4kMedi12dsnYMy>&}2Wz9PS;ZP;^Uz$JJajA=f_tA=!0g}rr z@J*Rf@G&d2qZ;1aCZnk-r;pjcsbFuX_U*?ij9m3v{%9wipWE8Izt|omvIl!Se;PK^ zuN)hpH=mA-ndq~cTOQfd+R&@cfW~g<&rzXPVKCPj*RQ|i0?#yO^6uurfoy3)kg&pO zbipRs$(20akBb9WL6eQ}9Gqg5$m1R}WfYsdY<|SZG9qyPF%d#OXVcVBZchjDqsNtWA0|zW4qu zH|pz0ij+pHgqh5n^b)Cpys(MeDjT~_r*%hqZEO>h-Gf)4M1Cnsz;CjXldvo))--#wKP1M+Ht9~Z?UcFHwk$~wJAZyb*9pD% zGwK6ZMQ8BG_wO-R27FmH~LH&)7lzb5lJJFoquO}P1R<(uatL` zHfv!%r#{aY78S%szX$gqH*gp@jmiqnFnNQ)3E1RImxk3^uA_+RutR>wPN_E5ynAW* z*)PL){Vpz!6f`*zHCTw(yNL=X4=e~v{&8z4ZU5NVWOyTrrQqN~*A)+s4i1!dp!Q*@ z3K9cC6^)A{lM0MoM=~qivEu=7g~>?^2+gsZx~O!w@|slm8+QcrO=@b2)uTR}pXsQ| zCe54I>!vo<5&b1yySSUbFc>6xe|ghp%`%p<5t@Nbq{K9isv>gFI&1D({p!zVnYYY- z?0NHYU`euVSa>)lt3x{Pq7Tm334;wCx^$s3xF|n5Fwo3KEjaE&s&-sSNcEq4Qea7`!NKq!2t{vsx28f_87qcg4qL7vi z@?-AxvQO!T(}fy-DPngjMKCk|^G=VIO#*C$r#N=KgZL=a3cY*CISr+qgi)z1YjNtU z-{?aRR8rhA{!haK)lqICVPQLKCuMSTQ&Y9Zj7f}(JFHGZspHRYoNh}l$ zUgysID&6kWA1oNU_CzMF0#X>OKiTb&y#KMsff4IdYRdh&QPo9{|EMkZwHScyV;>jQ z7g);Z>*|g-G5NIKpt_qw6`nX4+~h|)HW}^aU}K}Ksv4zp(|=lz1Y^mut?fDHA+)dY zf752}`{~oGNlA6(UiBXK_B{jBgnY^Uox_Fsw}Jft?-k3IsVFN?R$$1Eihh!*sgAyW zz~ta%OVf@Hs7Inw)nf16OEEE|t7)`A_U>(G#?pvWD?$uCzgYxZh9)bd^bFDJ+jk+L z2MH0g!&pxBlVf2ue&R$@*YoGkM>F-$TLMJL(xJ*t1 z1rx8h;h03&Cmu^S5+LUuTJFsg&;4yDm{FJBHk@UG0CH?ZsWYnXY|vuHXP>xNu6*X(?0h&!Df{x5WD z?D4!@QN{Xq2ywA3%y06|n>RZt;?TP7`0+gW;40O7sp3c}C^Ygu4O&Ye^3WXW-~Sxb zr|Ii@6?O0&^%*cNT;%WG#ZQglYMMd9N)r+YM<81DiJ_rOxxb``oUe>jHcc5c0I-KAvCB?1G?ZZuomchjZ_Zf3=HWv96vIS@U z^i^v3H;@kD#4=YNk;(#U8O}euE91h`P6M-1m+;Xn%TKtLknofK=eIZRxX zs(fwk;D85oJ1A^ph*mRGU)8Nm;eJdtHy2Vlp3bBk-#t5a6tTVrLQjmSX=(y{k=rSC z?K<1Wre?3#&o|^?w@9VZCNZ=?hWF|V+tUc32TT3XJeqJm_j z;}Z`Se)zy*^IOYnYHFDDAG1|+?p2F4r9HWN6HH8s9LAUB-Mum6Y*m34q!JWs=9+Iz zxKK(@3OMDsWL?0Wz(tK4zHG9mJ+kj5Sw3I4Zb&mpMcoE+GLhh6YHCq`TRKO)T)}h) zj&8DZLU?WnC+PC!tnBP+mAxtUCyyuZ%>S0#>6C-lhR4=R{jLhh?>r;|1$ zmS!xOng2FtwoP~Q!_vxE@BgrO@^Bk{ysFY_2Fsn~2RG|1_O zD=ReGwLNihc+b7-$kRsgo#I|KZj{g>x@|)fTq2n$K+K5u6o+1GJMw!a`7OJ`NQ%8&KL8rA#?HN z{FMeQ-X6gg14#9s&z4Y*%lDy2az2yu=zb&8p>;`6Cwe*D)b zmEQtNh!BdOa=W9VcPlK=vBK*my&50C;`tSyv+YzykpzwU$_XAgxDBis=Gv8kOT?}U zYOp<+AT(85zzhJ~0S!eL{7Tw$h|WwlN0(W`-hR`1XnwDz(zqzGLot=BhAw&-yKAg%FB$PnO^VlJ3>V}3V7C~Q~ z;`hYF*j-#;YH8_<7W1YyZ9ymVH5C6;=6v#r+&=W6x3#Cbx#{K|4G385?EEy@tDZRj z^y%y^#_1$U#HE={Py(PPA;|^!`RQR1A-l&Zwm`gAB{XN&3}`pBPWvIY>jJXwk}*+G-1k z?rw--u7DWQ5B||k74{JL%S{Ng3ga*L+NYwL5bZPoay7M?-o=4vNQiY+>smFD`8{cKKG~@Hrbff?O~r|zHQJe8 z47T4dT#eI8T=?0u6GE+$)_;<{-x85>-fV!LSmdPoN(PLxZ+OHUn;1Y!C&| zQu#hsVgQO0UlUJ7B9}59c14%D3ZAZiR&a759WUlVMs5B%LhbE8GZn1VP*OYdACVP7 zyz`Im_?uMyYq8@yei+~>;GP#I37*Vszo1ejvLP@~XLRXO z-P-LyS#LT1T08Pg`ZQ&KqI8-T_ZNU~oSO8M zQ8nGCp+Fm$clOk(RJmg~YuAtd)F}-c?lX;I?c8zWH-U0_(`xh1C@3c?Y^ZqIDD`3U z^~#%MKlUYgr<9?{c6t*>weWep!P=;e4*Q=}{r%R_--zY!nVGBUv@33vEe<5AKzukP ztcO0$NJ}dxyg>HuesoML=bxVda-*gApuDetL7S0Mj5KUzZLLS3;tDYvb++wNp20`w zPT!r{TDH+$9W=!9%KnXZXRi5k^k-RH+d+f8y?+1zXV8-cySDuf=SG)VgZzq*aw2kfm`$(M8Lis#GrJh=@)+yU!uBHZF=cn>zL2gFGy{ko(~Uqy%MTbWIpCE zaG1~z0tr#&2hAfePJ3AkjN)?b+To+^v$BHB!iVbbN}B$i49|+Don{HE3l%ez?Oo#+ z+4k((^`7=aNOVlnjvqTVRRJEhK_XWBqoo?M09{*>A@dttZMx+(+Yeu%JPcitIY&3! zp|^GafCt(7MfyHM74+;JG)Kwb(=2CKE^%4L7FE{;(+leeEF2AC^ zrWHwe6T>17c#42&mZTsa5<^JTrxzWf5(lSVdGQ}m7Z+wvy`(kg;u%^`{rmrs)Hv*q zsr^o`IMzW1R$y(_5*L@Sx^5@pxL+3zBX(yX-*Nf!o=QsUQUxaFPyj#c!dvKm4qXq< zINXuiHvd9js0>n@<_I?g zL(NKeze(yu0^HmRJU^$x;9r49^D%#v8~N<9x}drK9sc4OEhJ|(`oa7LxRo%YCpJN1 zUxVS>K5}v*Bq5^~6!M#~`Cfg^07>Mgm~rk1=9G2jkEg)mGSN2?S}zv+AZ(XSg6^Uz z$-S_$Qrdg>InaP{oU2ZE+g7L5yA9RTGn+Eyw9_iu2#Q<5hd0?^50s92x+CuL8K>%o z>4!IaF)_g3CA;83U%4mlr1LL~z{!`62aRXMJ7n#SyKYzIP4n`qr#2pG#=d93z>ominUIfbLsr*9jvp!txsJYFNxyJgMoMZ-q!xkTSP$(4B3QeJ>8P7ot z$XZL*xSw=QvYt6pm}F&TRUs~2GW=5W(uMt>-j>XUtKc#c00?-t$aEoSpW1;;z_=gZ z@u4fI@#xze6-%6G`b!fGo7C>K(QivuYspLZlN^lU?o&hK3$L{``mP=JYj3Fx|H- zKfpU6bF|zl+dpZ1T3l!oP9riPGq2dN_toppuQmBNN}k2{koU;Urn7Emjrc@cx8{N? zrx3g0PYBE&+Ywbb&b{|f8lc2s!W@Q36CGfgXz9~^wwG7&-4M4;Ul3^W6|~3k5S$g^ zz$jj=V$`ed|55Q;@Q)hzS6~pO@PBbZ8&cn7>s{A=ntOBmh?i?>GHraMY~FAG1JXDo AQ~&?~ literal 0 HcmV?d00001 diff --git a/crates/soapberry-zip/assets/readme.notzip b/crates/soapberry-zip/assets/readme.notzip new file mode 100644 index 0000000000000000000000000000000000000000..79b1cb6de33c6ae86451acedbd50df4207a5710e GIT binary patch literal 1906 zcmZ{kdr(wW9LF#2BCy+<&KTnWqeoDYf?diGWm*(rdC0=9fSQEX-QBahSMT1l-p4LT zi4V-tcod2+1|0)s(nQGY57(e##v~0b6;fv~ZB%?PjxVCeSm)ejE3NiD`}^JBw$b3;E)Jtu!?a5JHtYiIai?^Pf=7T1>TI-|ELOGw`deaWh*IaG!;b^7}7HG3=50G zC`E2}g|Y7OUZYg1rh=3M3W}0HTt#`N%X7zme(&%OMM2tpHa{wQ<(j%t1(RPmn_j!3 zoJGF=_Mw=`e{{@P;j5nCG(F@@Lo(`*ym=@&XgIRGa&C8DXi`$>#4Odcx;gW5Q@7s> z%QQ9ib-h&*vdE{sHAYSC>qy+cH$^k>Va>Zy1DgvfcaIqxR;6C>hWb*{`|Hey&i5zI zdGvBk!zXvM2UHK1h8bZ_@>h#{stuDuPQ|-zo7`WhUNmi7(_y~7zj|PMSael)aL7?l zTXHF-={fIwIBk8+xX8zAeJSfqJvkLwB^w?WH!nr4+QZuV0lT`g)}VIwx<2attYz$s z*59{pyZ(+hJe|7csYpETiwSQlj%w5+P1D@+$`h3XdG#5P*W~wK>F7U{{#vo})WKQ% znN0^Ro3?hQuI^9m_rzsnNA3Fghql5C z=TCgIYfDeJdE;|eUsYsIE&Oh`*22luexY~GRPS-v4} z`^61)>dl7hM>P84-{Ne^n{U;B>D5<-FMcoc?b>C1<6-}@G*hS%SV`6YDAfjb*+fBItw|TJkGEX|SKtX9&$T2ahIMLHKFJ-`gT z8dWCFvLUPFf}BW;4B`N7L6QiZ$O}XS7)}srmIX9t1Cjti7K+!XJPd9DoTBI!Qgk{8 zA|W!_F~Q$jbY;XiARGk=31mEd%FM)>i5eA|9xFvE;>k%C ziQ?H=WTjar5=1#s_iSBx#DyFS1py#jfcG+31AGgDw)lXyBkGg8kpO5O0vjWUJY$jM zNhQvP_aiRNE6hj`#4;8F7VpF1I(CGkL>Uq`=A;|)3k~vJO>UczK=hf%a5VNZBT$nV zl#lxhB$#8DrE=092$oBN2qf$w&qEs)&EmfhoB!85_*%cV9K&(Ib;=9DZ`fyq=v5=o9-!Wg(|(Fu@U{=5jv=Eg7*DxiCFA9fK6;#rQXoI5%xw0dSb%a9f=u@P3OhWbp*9kvtw+B!j{ z;a>p%fB++_wa|hSOcx+_{u)t&(!HFami^U2E&^8$n%wIsdzjqr^LWecQdLci_ zf)lcAtRPDw8%9nNETvs;7RCn#g{2}-TQLc;V%QwnU;lPfvQ2pgc`?Gfi*Y!y8*Y5` zWVI2?A}=8|aB{Tx5EIB!i1PqHV5uANBJjJU4^(?9W=<>0)=wdyDNpU7Q5F7&Eb<-0 Ng$TkBTgVF9KLIxb!L9%R literal 0 HcmV?d00001 diff --git a/crates/soapberry-zip/assets/readme.zip b/crates/soapberry-zip/assets/readme.zip new file mode 100644 index 0000000000000000000000000000000000000000..5642a67e77d5f5a45c92ea701801ae0b993f4724 GIT binary patch literal 1886 zcmZ{kdrVVT9LF!V3}{!Ui6aJx9zjJmXxSX1WJM9mLkg{cIvKj%wx{jY+k5JLv@kYN z!5JNo4aFBh<3P!DBFwoztb=VNPNqXQ6{aRQT~vG!dePQq(h46jfkI%g8e zQVuF8O1`*?@=TZJjr;uWq3w!-v;}N_RP?Gf^!Rp8n9Jq|ixOs_FG}=jWzwyBn5iYU=NPvm|7(PkVE$ zn%di$xNlF2X7Iz>ccTV36;$mSJ1(qRz3_GQ#iaMwnGc>DNSyodrP{_%?qmn>Ym_`BcAr;QcBZ%&iP<^W$pOL zM{9j4>rA~l6iev9+|0J$w{5-ljyF7=y6UM& zJm!lDZ!eB&(j!gtyz;8!RfBmA8IafH4_xjXIGFxwvGU}B+54D{2P_-6bfvByNbTZg zYEG9NYB-vdFO+)KO}6`|nkx(HT*IyXr^ zb^HgqOO3wPq2SAXb#rneMv5o4JvP34HlnBI=;Pp$q}mNz<3{4bQ};JSovnWG+B9qA z`L`nMm%5od^-K4rKX)VOUiZCa6BcZ#+EovCv*1kL;g;FHVQ zY{{E$Hhk&TSBEcoFZ1oX<^AJhEn}y}rX+ZV+siLrZ&J?L-d4B5xQgHL@!m?d<5)~^ z*Z%K)3sobPcVm7D-Ms(y`dgnq9J)SqM@h-LjT^3s4&Dr}Qa8&AGwPkgOp5G~&;#m~W^oaxwzIFVP8sfF#}u zffd;x;Gr7{jEML$U`M5NQ(^O z0Bu2%2%N|ZLGzUBk^a2`ij1{u-J}z1m zp-rRuS9wbr;3bYD1sn)RK|%r2p8bJ4Ay`{c!>CbwxjCX8Jq=Z9s(O9h&*GFzGk2y8VRA4d{ zkwlVXmoNryT66+rmw#{|Yy`tYt4!Mrl7l#cAs&K}tq=*Me!Al@Zc9gPMJ`MaPRAex zdNKY@B+gA+R{|VnINVkz3B2DT3|TyZYc!9C7Kvdo2zDO1M)?=OKOn%!YAv+j1TzGP z9eIkzI4m$3MsOmD#bU7n0<=v=1oIb=?(f}3mC#{Lv0lhevfzX)8!O0?$cB-V1WRd` zn}zX#L1C%L(^gD^tQa;&_Se7N)NE6pL0*jT?qVEH?1md3Jy~tUvdBwF4V)Y;KEwo$ x6yiL<4_N9(ya@a*=>yfCh?&!hvh`ERHRXvNG^)b?kVXE-a3O*)#1^uG_D`k1y14)V literal 0 HcmV?d00001 diff --git a/crates/soapberry-zip/assets/symlink.zip b/crates/soapberry-zip/assets/symlink.zip new file mode 100644 index 0000000000000000000000000000000000000000..af846938cde293ccc3dfb310fdfbda641382dd3f GIT binary patch literal 173 zcmWIWW@h1H00D{l&JGuU&FkX?vO$=gL588YGB+nPFFQ1ZlYv@nk^pZ;COKwYW=Vjo0E7PvK@{9%R*1=HrUrPkvVoK_0--OE_5yJj E01XfzDgXcg literal 0 HcmV?d00001 diff --git a/crates/soapberry-zip/assets/time-7zip.zip b/crates/soapberry-zip/assets/time-7zip.zip new file mode 100644 index 0000000000000000000000000000000000000000..4f74819d11dbe46d53897bcd0569bb14c9e13639 GIT binary patch literal 150 zcmWIWW@h1H0D;<-@!nttl;8l;C8@F9Ko|w2)xbIc^X400 literal 0 HcmV?d00001 diff --git a/crates/soapberry-zip/assets/time-go.zip b/crates/soapberry-zip/assets/time-go.zip new file mode 100644 index 0000000000000000000000000000000000000000..f008805fa42c982a0e28e5abe025425dbc1a9ad9 GIT binary patch literal 148 zcmWIWW@Zs#;9y{2s972B4W!_JgMpKwB(=CiucV?RG=!CbvDfowWPmq2NG%)$cr!AI bFyJ--t`N+?VjL?QNQMyz?SV81GcW)E?OYRQ literal 0 HcmV?d00001 diff --git a/crates/soapberry-zip/assets/time-infozip.zip b/crates/soapberry-zip/assets/time-infozip.zip new file mode 100644 index 0000000000000000000000000000000000000000..8e6394891f0f1000d5aff4c14fff25659a00ac7c GIT binary patch literal 166 zcmWIWW@h1H0D;<-@!nttl;B{HVJJy0F3~HgCpPx6NtkA0Hc^2-v9sr literal 0 HcmV?d00001 diff --git a/crates/soapberry-zip/assets/time-osx.zip b/crates/soapberry-zip/assets/time-osx.zip new file mode 100644 index 0000000000000000000000000000000000000000..e82c5c229e0917b8e33029e7666e755961ab9e48 GIT binary patch literal 142 zcmWIWW@h1H0D;<-@!nttl;B_xU?@o~F3~HgC<%?=VYu$~GqTt7XXIB#5rzP7MkY~a iT>5xm#yBi#1Thh&aKKFo@MdKL$uR<<6Oc9oaTowu0U8JZ literal 0 HcmV?d00001 diff --git a/crates/soapberry-zip/assets/time-win7.zip b/crates/soapberry-zip/assets/time-win7.zip new file mode 100644 index 0000000000000000000000000000000000000000..8ba222b224674153fb65b5be87ca89f434fcb110 GIT binary patch literal 114 zcmWIWW@Zs#0D;<-@!nttl;8l;C8@F9Ko|w2)xbIc<#roA literal 0 HcmV?d00001 diff --git a/crates/soapberry-zip/assets/time-winzip.zip b/crates/soapberry-zip/assets/time-winzip.zip new file mode 100644 index 0000000000000000000000000000000000000000..f6e8f8ba067e462fe7a9727159390919571b8270 GIT binary patch literal 150 zcmWIWW@h1H0D;<-@!nttl;8l;C8@jkM&B_K+#0Z2@Kw1r~0|1@$8R7r{ literal 0 HcmV?d00001 diff --git a/crates/soapberry-zip/assets/unix.zip b/crates/soapberry-zip/assets/unix.zip new file mode 100644 index 0000000000000000000000000000000000000000..ce1a981b2806d7e7a4026383622bf033aac426a4 GIT binary patch literal 620 zcmWIWW@h1H0D+!>4*T9e!nGVgHVCsa$S`E2=H%puhHx@4ujqc@7dHEWUugw510%}| zW(Ec@QJ!CvlcK=O6#zG8CeWC9v+JtZfJT5YJJ6Vv%p(1y#3Hakhkynd%)&4zEk7T{ z80NqZd!TMO;DQ>Hnp;p(sSh@(t>=Ls2%|X(;glmlr$hT_XGm26ZQ}Xk2 zD#0cQ0Ck_l^i*bUL4Hw5VqOW@MT|^x%(y~G0_;9UAi1p(#DsO=)pfJN@7-m>O3}avrVEFIY2Q>^9azOL2h8n_gnBfL<93z8D<0YVZh)@KY Y1`0(C*Rg`)o`D4j&49t9016@o0NYf3UjP6A literal 0 HcmV?d00001 diff --git a/crates/soapberry-zip/assets/winxp.zip b/crates/soapberry-zip/assets/winxp.zip new file mode 100644 index 0000000000000000000000000000000000000000..3919322f0c5f8be8f1a214af712b6e86b4d04aef GIT binary patch literal 412 zcmWIWW@h1H0D+!>4*T9e!nGVgHVCrq~yGK=(+5{uIE^HG#C2X@#4W#Is17f5MpZb3<EG$%FNt?{GyV?yb`dg^b>%*JLMKe)obQ>FfgY#Nc!n~3 zGWsmC$cFihkI1D@-9^IQUv@AAcr!BTGV7w4^dAb?7<7Q*5U`{XL_^GFWDo$`1QG$+ z2m+xYtPIS5f>i@bE4UdLS#~KhF|c$9GXTwJV}qHZ%K)+m0vOTg1SsDFN(1$=gP1Fz Te31G8Z&r}!7+~%L(F_a#G1EK) literal 0 HcmV?d00001 diff --git a/crates/soapberry-zip/assets/zip64.zip b/crates/soapberry-zip/assets/zip64.zip new file mode 100644 index 0000000000000000000000000000000000000000..a2ee1fa33dca48e1ec8dfc7507640bfa09bddeb6 GIT binary patch literal 242 zcmWIWW@Zs#U|`^2Feu@2tb6`HQw7KaVKyKRa&>g^b>%*JLMKe)obQ>FfgY#Nc!n~3 zGWsmC$cFihkI1D@-9^IQUv@AAcr!BTGV7w4^dAb?7(g~az>-D~4KbIIK>%zMNCadf u2n2YuvFSjV47xxF1B_4xjP`)?VKh)5J4k2(lDYtIR*)wcVD13X3=9B3ZZ^#T literal 0 HcmV?d00001 diff --git a/crates/soapberry-zip/src/archive.rs b/crates/soapberry-zip/src/archive.rs new file mode 100644 index 0000000..b7f9c3a --- /dev/null +++ b/crates/soapberry-zip/src/archive.rs @@ -0,0 +1,1998 @@ +use crate::crc::crc32_chunk; +use crate::errors::{Error, ErrorKind}; +use crate::extra_fields::{ExtraFieldId, ExtraFields}; +use crate::mode::{ + CREATOR_FAT, CREATOR_MACOS, CREATOR_NTFS, CREATOR_UNIX, CREATOR_VFAT, EntryMode, + msdos_mode_to_file_mode, unix_mode_to_file_mode, +}; +use crate::path::{RawPath, ZipFilePath}; +use crate::reader_at::{FileReader, MutexReader, RangeReader, ReaderAt, ReaderAtExt}; +use crate::time::{ZipDateTimeKind, extract_best_timestamp}; +use crate::utils::{le_u16, le_u32, le_u64}; +use crate::{EndOfCentralDirectory, EndOfCentralDirectoryRecordFixed, ZipLocator}; +use std::io::{Read, Seek, Write}; + +pub(crate) const END_OF_CENTRAL_DIR_SIGNATURE64: u32 = 0x06064b50; +pub(crate) const END_OF_CENTRAL_DIR_LOCATOR_SIGNATURE: u32 = 0x07064b50; +pub(crate) const CENTRAL_HEADER_SIGNATURE: u32 = 0x02014b50; +/// The recommended buffer size to use when reading from a zip file. +/// +/// This buffer size was chosen as it can hold an entire central directory +/// record as the spec states (4.4.10): +/// +/// > the combined length of any directory and these three fields SHOULD NOT +/// > generally exceed 65,535 bytes. +pub const RECOMMENDED_BUFFER_SIZE: usize = 1 << 16; + +/// Represents a Zip archive that operates on an in-memory data. +/// +/// A [`ZipSliceArchive`] is more efficient and easier to use than a [`ZipArchive`], +/// as there is no buffer management and memory copying involved. +/// +/// # Examples +/// +/// ```rust +/// use soapberry_zip::{ZipArchive, ZipSliceArchive, Error}; +/// +/// fn process_zip_slice(data: &[u8]) -> Result<(), Error> { +/// let archive = ZipArchive::from_slice(data)?; +/// println!("Found {} entries.", archive.entries_hint()); +/// for entry_result in archive.entries() { +/// let entry = entry_result?; +/// println!("File: {}", entry.file_path().try_normalize()?.as_ref()); +/// } +/// Ok(()) +/// } +/// ``` +#[derive(Debug, Clone)] +pub struct ZipSliceArchive> { + data: T, + eocd: EndOfCentralDirectory, +} + +impl> ZipSliceArchive { + pub(crate) fn new(data: T, eocd: EndOfCentralDirectory) -> Self { + ZipSliceArchive { data, eocd } + } + + /// Returns an iterator over the entries in the central directory of the archive. + pub fn entries(&self) -> ZipSliceEntries<'_> { + let data = self.data.as_ref(); + let directory_start = self.eocd.directory_offset(); + let entry_data = &data[(directory_start as usize)..self.eocd.head_eocd_offset() as usize]; + ZipSliceEntries { + entry_data, + base_offset: self.eocd.base_offset(), + current_offset: directory_start, + } + } + + /// Returns the byte slice that represents the zip file. + /// + /// This will include the entire input slice. + pub fn as_bytes(&self) -> &[u8] { + self.data.as_ref() + } + + /// Returns a hint for the total number of entries in the archive. + /// + /// This value is read from the End of Central Directory record. + pub fn entries_hint(&self) -> u64 { + self.eocd.entries() + } + + /// Returns the offset of the End of Central Directory (EOCD) signature. + /// + /// See [`ZipArchive::eocd_offset()`] for more details. + pub fn eocd_offset(&self) -> u64 { + self.eocd.tail_eocd_offset() + } + + /// The declared offset of the start of the central directory. + /// + /// See [`ZipArchive::directory_offset()`] for more details. + pub fn directory_offset(&self) -> u64 { + self.eocd.directory_offset() + } + + /// Returns the offset where the ZIP archive ends. + /// + /// See [`ZipArchive::end_offset`] for more details. + pub fn end_offset(&self) -> u64 { + self.eocd.tail_eocd_offset() + + EndOfCentralDirectoryRecordFixed::SIZE as u64 + + self.comment().as_bytes().len() as u64 + } + + /// The comment of the zip file. + pub fn comment(&self) -> ZipStr<'_> { + let data = self.data.as_ref(); + let comment_start = + self.eocd.tail_eocd_offset() as usize + EndOfCentralDirectoryRecordFixed::SIZE; + let comment_len = self.eocd.comment_len(); + ZipStr::new(&data[comment_start..comment_start + comment_len]) + } + + /// Converts the [`ZipSliceArchive`] into a general [`ZipArchive`]. + /// + /// This is useful for unifying code that might handle both slice-based + /// and reader-based archives. + #[deprecated(note = "Use `ZipSliceArchive::into_zip_archive` instead")] + pub fn into_reader(self) -> ZipArchive { + ZipArchive { + reader: self.data, + eocd: self.eocd, + } + } + + /// Converts the [`ZipSliceArchive`] into a general [`ZipArchive`]. + /// + /// This is useful for unifying code that might handle both slice-based and + /// reader-based archives. The data is wrapped in a [`std::io::Cursor`] to + /// provide the [`ReaderAt`] implementation needed for [`ZipArchive`]. + pub fn into_zip_archive(self) -> ZipArchive> { + ZipArchive { + reader: std::io::Cursor::new(self.data), + eocd: self.eocd, + } + } + + /// Seeks to the given file entry in the zip archive. + /// + /// See [`ZipArchive::get_entry`] for more details. The biggest difference + /// between the reader and slice APIs is that the slice APIs will eagerly + /// validate that the entire compressed data is present. + pub fn get_entry(&self, entry: ZipArchiveEntryWayfinder) -> Result, Error> { + let data = self.data.as_ref(); + let header = &data[(entry.local_header_offset as usize).min(data.len())..]; + let file_header = ZipLocalFileHeaderFixed::parse(header)?; + let variable_length = file_header.variable_length(); + + let header_size = (ZipLocalFileHeaderFixed::SIZE + variable_length) as u32; + let (total_size, o1) = + (u64::from(header_size)).overflowing_add(entry.compressed_size_hint()); + + if o1 || (header.len() as u64) < total_size { + return Err(Error::from(ErrorKind::Eof)); + } + + let (entire_entry, rest) = header.split_at(total_size as usize); + + let expected_crc = if entry.has_data_descriptor { + DataDescriptor::parse(rest)?.crc + } else { + entry.crc + }; + + Ok(ZipSliceEntry { + data: entire_entry, + verifier: ZipVerification { + crc: expected_crc, + uncompressed_size: entry.uncompressed_size_hint(), + }, + local_header_offset: entry.local_header_offset, + data_start_offset: header_size, + }) + } +} + +/// Represents a single entry (file or directory) within a `ZipSliceArchive`. +/// +/// It provides access to the raw compressed data of the entry. +#[derive(Debug, Clone)] +pub struct ZipSliceEntry<'a> { + // From local header offset to end of compressed data + data: &'a [u8], + verifier: ZipVerification, + local_header_offset: u64, + // self.data[self.data_start_offset] is the start of compressed data + data_start_offset: u32, +} + +impl<'a> ZipSliceEntry<'a> { + /// Returns the raw, compressed data of the entry as a byte slice. + pub fn data(&self) -> &'a [u8] { + &self.data[self.data_start_offset as usize..] + } + + /// Returns a verifier for the CRC and uncompressed size of the entry. + /// + /// Useful when it's more practical to oneshot decompress the data, + /// otherwise use [`ZipSliceEntry::verifying_reader`] to stream + /// decompression and verification. + pub fn claim_verifier(&self) -> ZipVerification { + self.verifier + } + + /// Returns a reader that wraps a decompressor and verify the size and CRC + /// of the decompressed data once finished. + pub fn verifying_reader(&self, reader: D) -> ZipSliceVerifier + where + D: std::io::Read, + { + ZipSliceVerifier { + reader, + verifier: self.verifier, + crc: 0, + size: 0, + } + } + + /// Returns the byte range of the compressed data within the archive. + /// + /// See [`ZipEntry::compressed_data_range`] for more details. + pub fn compressed_data_range(&self) -> (u64, u64) { + let compressed_data_start = self.local_header_offset + self.data_start_offset as u64; + let compressed_data_end = + compressed_data_start + (self.data.len() - self.data_start_offset as usize) as u64; + (compressed_data_start, compressed_data_end) + } + + /// Returns an iterator over the extra fields from the local file header. + /// + /// See [`ZipLocalFileHeader`] for more details. + pub fn extra_fields(&self) -> ExtraFields<'_> { + let header = + ZipLocalFileHeaderFixed::parse(self.data).expect("header has already been parsed"); + let file_name_len = header.file_name_len as usize; + let extra_field_len = header.extra_field_len as usize; + let extra_field_start = ZipLocalFileHeaderFixed::SIZE + file_name_len; + let extra_field_end = extra_field_start + extra_field_len; + ExtraFields::new(&self.data[extra_field_start..extra_field_end]) + } + + /// Returns the file path from the local file header. + /// + /// See [`ZipLocalFileHeader`] for more details. + pub fn file_path(&self) -> ZipFilePath> { + let header = + ZipLocalFileHeaderFixed::parse(self.data).expect("header has already been parsed"); + let file_name_len = header.file_name_len as usize; + let filename_start = ZipLocalFileHeaderFixed::SIZE; + let filename_end = filename_start + file_name_len; + ZipFilePath::from_bytes(&self.data[filename_start..filename_end]) + } +} + +/// Verifies the wrapped reader returns the expected CRC and uncompressed size +#[derive(Debug, Clone)] +pub struct ZipSliceVerifier { + reader: D, + crc: u32, + size: u64, + verifier: ZipVerification, +} + +impl ZipSliceVerifier { + /// Consumes the `ZipSliceVerifier`, returning the underlying reader. + pub fn into_inner(self) -> D { + self.reader + } +} + +impl std::io::Read for ZipSliceVerifier +where + D: std::io::Read, +{ + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + let read = self.reader.read(buf)?; + self.crc = crc32_chunk(&buf[..read], self.crc); + self.size += read as u64; + + if read == 0 || self.size >= self.verifier.size() { + self.verifier + .valid(ZipVerification { + crc: self.crc, + uncompressed_size: self.size, + }) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; + } + + Ok(read) + } +} + +/// An iterator over the central directory file header records. +/// +/// Created from [`ZipSliceArchive::entries`]. +#[derive(Debug, Clone)] +pub struct ZipSliceEntries<'data> { + entry_data: &'data [u8], + base_offset: u64, + current_offset: u64, +} + +impl<'data> ZipSliceEntries<'data> { + /// Yield the next zip file entry in the central directory if there is any + #[inline] + pub fn next_entry(&mut self) -> Result>, Error> { + if self.entry_data.is_empty() { + return Ok(None); + } + + let file_header = ZipFileHeaderFixed::parse(self.entry_data)?; + let Some((file_name, extra_field, file_comment, entry_data)) = + file_header.parse_variable_length(&self.entry_data[ZipFileHeaderFixed::SIZE..]) + else { + return Err(Error::from(ErrorKind::Eof)); + }; + + let mut entry = ZipFileHeaderRecord::from_parts( + file_header, + file_name, + extra_field, + file_comment, + self.current_offset, + ); + entry.local_header_offset += self.base_offset; + self.current_offset += (self.entry_data.len() - entry_data.len()) as u64; + self.entry_data = entry_data; + Ok(Some(entry)) + } +} + +impl<'data> Iterator for ZipSliceEntries<'data> { + type Item = Result, Error>; + + #[inline] + fn next(&mut self) -> Option { + self.next_entry().transpose() + } +} + +/// The main entrypoint for reading a Zip archive. +/// +/// It can be created from a slice, a file, or any `Read + Seek` source. +/// +/// # Examples +/// +/// Creating from a file: +/// +/// ```rust +/// # use soapberry_zip::{ZipArchive, Error, RECOMMENDED_BUFFER_SIZE}; +/// # use std::fs::File; +/// # use std::io; +/// fn example_from_file(file: File) -> Result<(), Error> { +/// let mut buffer = vec![0u8; RECOMMENDED_BUFFER_SIZE]; +/// let archive = ZipArchive::from_file(file, &mut buffer)?; +/// Ok(()) +/// } +/// ``` +/// +/// For more complex use cases, use the [`ZipLocator`] to locate an archive. +#[derive(Debug, Clone)] +pub struct ZipArchive { + reader: R, + eocd: EndOfCentralDirectory, +} + +impl ZipArchive<()> { + /// Creates a [`ZipLocator`] configured with a maximum search space for the + /// End of Central Directory Record (EOCD). + pub fn with_max_search_space(max_search_space: u64) -> ZipLocator { + ZipLocator::new().max_search_space(max_search_space) + } + + /// Parses an archive from in-memory data. + pub fn from_slice>(data: T) -> Result, Error> { + ZipLocator::new().locate_in_slice(data).map_err(|(_, e)| e) + } + + /// Parses an archive from a file by reading the End of Central Directory. + /// + /// A buffer is required to read parts of the file. + /// [`RECOMMENDED_BUFFER_SIZE`] can be used to construct this buffer. + pub fn from_file( + file: std::fs::File, + buffer: &mut [u8], + ) -> Result, Error> { + ZipLocator::new() + .locate_in_file(file, buffer) + .map_err(|(_, e)| e) + } + + /// Parses an archive from a seekable reader. + /// + /// Prefer [`ZipArchive::from_file`] and [`ZipArchive::from_slice`] when + /// possible, as they are more efficient due to not wrapping the underlying + /// reader in a mutex to support positioned io. + /// + /// ```rust + /// # use soapberry_zip::{ZipArchive, Error, RECOMMENDED_BUFFER_SIZE, ZipFileHeaderRecord}; + /// # use std::io::Cursor; + /// fn example(zip_data: &[u8]) -> Result<(), Error> { + /// let mut buffer = vec![0u8; RECOMMENDED_BUFFER_SIZE]; + /// let archive = ZipArchive::from_seekable(Cursor::new(zip_data), &mut buffer)?; + /// Ok(()) + /// } + /// ``` + pub fn from_seekable( + mut reader: R, + buffer: &mut [u8], + ) -> Result>, Error> + where + R: Read + Seek, + { + let end_offset = reader.seek(std::io::SeekFrom::End(0))?; + let reader = MutexReader::new(reader); + ZipLocator::new() + .locate_in_reader(reader, buffer, end_offset) + .map_err(|(_, e)| e) + } +} + +impl ZipArchive { + pub(crate) fn new(reader: R, eocd: EndOfCentralDirectory) -> Self { + ZipArchive { reader, eocd } + } + + /// Returns a reference to the underlying reader. + pub fn get_ref(&self) -> &R { + &self.reader + } + + /// Consumes this archive and returns the underlying reader. + pub fn into_inner(self) -> R { + self.reader + } + + /// Returns a lending iterator over the entries in the central directory of + /// the archive. + /// + /// Requires a mutable buffer to read directory entries from the underlying + /// reader. + /// + /// ```rust + /// # use soapberry_zip::{ZipArchive, Error, RECOMMENDED_BUFFER_SIZE, ZipFileHeaderRecord}; + /// # use std::fs::File; + /// fn example(file: File) -> Result<(), Error> { + /// let mut buffer = vec![0u8; RECOMMENDED_BUFFER_SIZE]; + /// let archive = ZipArchive::from_file(file, &mut buffer)?; + /// let entries_hint = archive.entries_hint(); + /// let mut actual_entries = 0; + /// let mut entries_iterator = archive.entries(&mut buffer); + /// while let Some(_) = entries_iterator.next_entry()? { + /// actual_entries += 1; + /// } + /// println!("Found {} entries (hint: {})", actual_entries, entries_hint); + /// Ok(()) + /// } + /// ``` + pub fn entries<'archive, 'buf>( + &'archive self, + buffer: &'buf mut [u8], + ) -> ZipEntries<'archive, 'buf, R> { + ZipEntries { + buffer, + archive: self, + pos: 0, + end: 0, + offset: self.eocd.directory_offset(), + base_offset: self.eocd.base_offset(), + central_dir_end_pos: self.eocd.head_eocd_offset(), + } + } + + /// Returns a hint for the total number of entries in the archive. + /// + /// This value is read from the End of Central Directory record. + pub fn entries_hint(&self) -> u64 { + self.eocd.entries() + } + + /// Returns a Read implementation for the comment of the zip archive. + /// + /// Use [`RangeReader::remaining()`] to get the comment length before + /// reading. It is guaranteed to be less than `u16::MAX`. + /// + /// # Examples + /// + /// ```rust + /// use soapberry_zip::{ZipArchive, ZipStr, RECOMMENDED_BUFFER_SIZE}; + /// use std::io::Read; + /// use std::fs::File; + /// + /// let file = File::open("assets/test.zip")?; + /// let mut buffer = vec![0u8; RECOMMENDED_BUFFER_SIZE]; + /// let archive = ZipArchive::from_file(file, &mut buffer)?; + /// + /// let mut comment_reader = archive.comment(); + /// let comment_len = comment_reader.remaining() as usize; + /// comment_reader.read_exact(&mut buffer[..comment_len])?; + /// + /// let actual = ZipStr::new(&buffer[..comment_len]); + /// let expected = ZipStr::new(b"This is a zipfile comment."); + /// assert_eq!(expected, actual); + /// # Ok::<(), Box>(()) + /// ``` + pub fn comment(&self) -> RangeReader<&R> { + let comment_start = + self.eocd.tail_eocd_offset() + EndOfCentralDirectoryRecordFixed::SIZE as u64; + let comment_end = comment_start + self.eocd.comment_len() as u64; + RangeReader::new(&self.reader, comment_start..comment_end) + } + + /// Returns the offset of the End of Central Directory (EOCD) signature. + /// + /// This is the byte position where the EOCD signature (0x06054b50) was found. + /// Useful for recovery scenarios when dealing with false EOCD signatures or + /// when restarting archive searches from a known position. + /// + /// # Examples + /// + /// ```rust + /// # use soapberry_zip::{ZipArchive, ZipLocator, RECOMMENDED_BUFFER_SIZE}; + /// # use std::fs::File; + /// # fn example() -> Result<(), Box> { + /// # let file = File::open("assets/test.zip")?; + /// # let mut buffer = vec![0u8; RECOMMENDED_BUFFER_SIZE]; + /// let archive = ZipArchive::from_file(file, &mut buffer)?; + /// let eocd_position = archive.eocd_offset(); + /// + /// let locator = ZipLocator::new(); + /// let reader = archive.get_ref(); + /// let maybe_previous = locator.locate_in_reader(reader, &mut buffer, eocd_position); + /// # Ok(()) + /// # } + /// ``` + pub fn eocd_offset(&self) -> u64 { + self.eocd.tail_eocd_offset() + } + + /// The declared offset of the start of the central directory. + /// + /// To verify the validity of this offset, start iterating through the + /// central directory via `entries()`. Ensure no errors are returned on the + /// first entry. + /// + /// This value is useful when calculating the amount of prelude data exists + /// in the data, as it will serve as the upper bound until each file's + /// [`ZipFileHeaderRecord::local_header_offset`] can be examined. + pub fn directory_offset(&self) -> u64 { + self.eocd.directory_offset() + } + + /// Returns the offset where the ZIP archive ends. + /// + /// This returns the position immediately after the last byte of the ZIP + /// archive, including the End of Central Directory record and any comment. + /// This is useful for extracting trailing data. + /// + /// The calculation does not rely on any self reported values from the + /// archive. + /// + /// This can be used in conjunction with the starting offset calculation + /// start offset as shown in [`RangeReader`] to determine the exact byte + /// range (and thus size) of the ZIP archive within a context of a larger + /// file. + pub fn end_offset(&self) -> u64 { + self.eocd.tail_eocd_offset() + + EndOfCentralDirectoryRecordFixed::SIZE as u64 + + self.comment().remaining() + } +} + +impl ZipArchive +where + R: ReaderAt, +{ + /// Seeks to the given file entry in the zip archive. + pub fn get_entry(&self, entry: ZipArchiveEntryWayfinder) -> Result, Error> { + let mut buffer = [0u8; ZipLocalFileHeaderFixed::SIZE]; + self.reader + .read_exact_at(&mut buffer, entry.local_header_offset)?; + + // The central directory is the source of truth so we really only parse + // out the local file header to verify the signature and understand the + // variable length. Not everyone uses this as the source of truth: + // https://labs.redyops.com/index.php/2020/04/30/spending-a-night-reading-the-zip-file-format-specification/ + let file_header = ZipLocalFileHeaderFixed::parse(&buffer)?; + let (body_offset, o1) = entry + .local_header_offset + .overflowing_add(ZipLocalFileHeaderFixed::SIZE as u64); + let (body_offset, o2) = body_offset.overflowing_add(file_header.variable_length() as u64); + let (body_end_offset, o3) = body_offset.overflowing_add(entry.compressed_size); + + if o1 || o2 || o3 { + return Err(Error::from(ErrorKind::Eof)); + } + + Ok(ZipEntry { + archive: self, + entry, + body_offset, + body_end_offset, + }) + } +} + +/// Represents a single entry (file or directory) within a [`ZipArchive`] +#[derive(Debug, Clone)] +pub struct ZipEntry<'archive, R> { + archive: &'archive ZipArchive, + body_offset: u64, + body_end_offset: u64, + entry: ZipArchiveEntryWayfinder, +} + +impl<'archive, R> ZipEntry<'archive, R> +where + R: ReaderAt, +{ + /// Returns a [`ZipReader`] for reading the compressed data of this entry. + pub fn reader(&self) -> ZipReader<&'archive R> { + ZipReader { + entry: self.entry, + range_reader: RangeReader::new( + self.archive.get_ref(), + self.body_offset..self.body_end_offset, + ), + } + } + + /// Returns a reader that wraps a decompressor and verify the size and CRC + /// of the decompressed data once finished. + pub fn verifying_reader(&self, reader: D) -> ZipVerifier + where + D: std::io::Read, + { + ZipVerifier { + reader, + crc: 0, + size: 0, + archive: self.archive.get_ref(), + end_offset: self.body_end_offset, + wayfinder: self.entry, + } + } + + /// Returns a tuple of start and end byte offsets for the compressed data + /// within the underlying reader. + /// + /// This method uses the information from the local file header in its + /// calculations. + /// + /// # Security Usage + /// + /// This method is useful for detecting overlapping entries, which are often + /// used in zip bombs. By comparing the ranges returned by this method + /// across multiple entries, you can identify when entries share compressed + /// data: + /// + /// ```rust + /// # use soapberry_zip::{ZipArchive, Error}; + /// # fn example(data: &[u8]) -> Result<(), Error> { + /// let archive = ZipArchive::from_slice(data)?; + /// let mut ranges = Vec::new(); + /// + /// for entry_result in archive.entries() { + /// let entry = entry_result?; + /// let wayfinder = entry.wayfinder(); + /// if let Ok(zip_entry) = archive.get_entry(wayfinder) { + /// ranges.push(zip_entry.compressed_data_range()); + /// } + /// } + /// + /// // Check for overlapping ranges + /// ranges.sort_by_key(|&(start, _)| start); + /// for window in ranges.windows(2) { + /// let (_, end1) = window[0]; + /// let (start2, _) = window[1]; + /// if end1 > start2 { + /// panic!("Warning: Overlapping entries detected!"); + /// } + /// } + /// # Ok(()) + /// # } + /// ``` + pub fn compressed_data_range(&self) -> (u64, u64) { + (self.body_offset, self.body_end_offset) + } + + /// Returns the local file header information. + /// + /// This method reads the local file header to which may differ from the + /// central directory data. Most ZIP tools use the central directory as + /// authoritative, but access to local header data can be useful: + /// + /// The local header may contain: + /// - Additional or different extra fields (richer timestamp data, etc.) + /// - Different filename than the central directory (security concern) + /// + /// The buffer argument must be large enough to hold both the filename and + /// extra fields from the local header or a too small error will be + /// returned. + /// + /// # Examples + /// + /// ```rust + /// # use soapberry_zip::{ZipArchive, RECOMMENDED_BUFFER_SIZE, extra_fields::ExtraFieldId}; + /// # use std::fs::File; + /// # fn example() -> Result<(), Box> { + /// // Test with filename mismatch test fixture + /// let file = File::open("assets/filename_mismatch_test.zip")?; + /// let mut buf = vec![0u8; RECOMMENDED_BUFFER_SIZE]; + /// let archive = ZipArchive::from_file(file, &mut buf)?; + /// + /// let mut entries = archive.entries(&mut buf); + /// let entry_header = entries.next_entry()?.unwrap(); + /// + /// // Central directory shows one filename + /// assert_eq!(entry_header.file_path().as_ref(), b"malware.exe"); + /// let wayfinder = entry_header.wayfinder(); + /// let entry = archive.get_entry(wayfinder)?; + /// + /// // Read the local header + /// let mut local_buffer = vec![0u8; 1024]; + /// let local_header = entry.local_header(&mut local_buffer)?; + /// + /// // Local header shows different filename + /// assert_eq!(local_header.file_path().as_ref(), b"safe_file.txt"); + /// + /// // Access extra fields from local header + /// let mut found_fields = 0; + /// for (field_id, _data) in local_header.extra_fields() { + /// found_fields += 1; + /// // Could check for specific extra field types here + /// println!("Found extra field: {:04x}", field_id.as_u16()); + /// } + /// # Ok(()) + /// # } + /// ``` + pub fn local_header<'a>(&self, buffer: &'a mut [u8]) -> Result, Error> { + let mut header_buffer = [0u8; ZipLocalFileHeaderFixed::SIZE]; + + // Read the local file header + self.archive + .get_ref() + .read_exact_at(&mut header_buffer, self.entry.local_header_offset)?; + + let local_header_fixed = + ZipLocalFileHeaderFixed::parse(&header_buffer).expect("header has already been parsed"); + let file_name_len = local_header_fixed.file_name_len as usize; + let extra_field_len = local_header_fixed.extra_field_len as usize; + let total_variable_len = file_name_len + extra_field_len; + + // Check if buffer is large enough for both filename and extra fields + if buffer.len() < total_variable_len { + return Err(Error::from(ErrorKind::BufferTooSmall)); + } + + let variable_data = &mut buffer[..total_variable_len]; + let variable_data_offset = + self.entry.local_header_offset + ZipLocalFileHeaderFixed::SIZE as u64; + self.archive + .get_ref() + .read_exact_at(variable_data, variable_data_offset)?; + + let (filename_data, extra_field_data) = variable_data.split_at(file_name_len); + Ok(ZipLocalFileHeader { + file_path: ZipFilePath::from_bytes(filename_data), + extra_fields: ExtraFields::new(extra_field_data), + }) + } +} + +/// Holds the expected CRC32 checksum and uncompressed size for a Zip entry. +/// +/// This struct is used to verify the integrity of decompressed data. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ZipVerification { + pub crc: u32, + pub uncompressed_size: u64, +} + +impl ZipVerification { + /// Returns the expected CRC32 checksum. + pub fn crc(&self) -> u32 { + self.crc + } + + /// Returns the expected uncompressed size. + pub fn size(&self) -> u64 { + self.uncompressed_size + } + + /// Validates the size and CRC of the entry. + /// + /// This function will return an error if the size or CRC does not match + /// the expected values. + pub fn valid(&self, rhs: ZipVerification) -> Result<(), Error> { + if self.size() != rhs.size() { + return Err(Error::from(ErrorKind::InvalidSize { + expected: self.size(), + actual: rhs.size(), + })); + } + + // If the CRC is 0, then it is not verified. + if self.crc() != 0 && self.crc() != rhs.crc() { + return Err(Error::from(ErrorKind::InvalidChecksum { + expected: self.crc(), + actual: rhs.crc(), + })); + } + + Ok(()) + } +} + +/// Verifies the checksum of the decompressed data matches the checksum listed in the zip +#[derive(Debug, Clone)] +pub struct ZipVerifier { + reader: Decompressor, + crc: u32, + size: u64, + archive: ReaderAt, + end_offset: u64, + wayfinder: ZipArchiveEntryWayfinder, +} + +impl ZipVerifier { + /// Consumes the [`ZipVerifier`], returning the underlying decompressor. + pub fn into_inner(self) -> Decompressor { + self.reader + } +} + +impl std::io::Read for ZipVerifier +where + Decompressor: std::io::Read, + Reader: ReaderAt, +{ + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + let read = self.reader.read(buf)?; + self.crc = crc32_chunk(&buf[..read], self.crc); + self.size += read as u64; + + if read == 0 || self.size >= self.wayfinder.uncompressed_size_hint() { + let crc = if self.wayfinder.has_data_descriptor { + DataDescriptor::read_at(&self.archive, self.end_offset).map(|x| x.crc) + } else { + Ok(self.crc) + }; + + crc.and_then(|crc| { + let expected = ZipVerification { + crc: self.crc, + uncompressed_size: self.wayfinder.uncompressed_size_hint(), + }; + + expected.valid(ZipVerification { + crc, + uncompressed_size: self.size, + }) + }) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; + } + + Ok(read) + } +} + +/// A reader for a Zip entry's compressed data. +#[derive(Debug, Clone)] +pub struct ZipReader { + entry: ZipArchiveEntryWayfinder, + range_reader: RangeReader, +} + +impl ZipReader +where + R: ReaderAt, +{ + /// Returns an object that can be used to verify the size and checksum of + /// inflated data + /// + /// Consumes the reader, so this should be called after all data has been read from the entry. + /// + /// The function will read the data descriptor if one is expected to exist. + pub fn claim_verifier(self) -> Result { + let expected_size = self.entry.uncompressed_size_hint(); + + let expected_crc = if self.entry.has_data_descriptor { + let end_offset = self.range_reader.end_offset(); + let archive = self.range_reader.into_inner(); + DataDescriptor::read_at(archive, end_offset).map(|x| x.crc)? + } else { + self.entry.crc + }; + + Ok(ZipVerification { + crc: expected_crc, + uncompressed_size: expected_size, + }) + } +} + +impl Read for ZipReader +where + R: ReaderAt, +{ + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + self.range_reader.read(buf) + } +} + +/// Local file header information from a ZIP archive entry. +/// +/// This struct provides access to data stored in the local file header of a ZIP entry, +/// which may differ from the information in the central directory. The local header +/// contains the filename and extra fields as they appear at the start of each entry's +/// data within the ZIP file. +/// +/// Most ZIP tools use the central directory as authoritative, but access to local +/// header data is useful for validation, security analysis, and forensic purposes. +#[derive(Debug)] +pub struct ZipLocalFileHeader<'a> { + file_path: ZipFilePath>, + extra_fields: ExtraFields<'a>, +} + +impl<'a> ZipLocalFileHeader<'a> { + /// Returns the file path from the local file header. + /// + /// This may differ from the central directory file path. + pub fn file_path(&self) -> ZipFilePath> { + self.file_path + } + + /// Returns an iterator over the extra fields from the local file header. + /// + /// Extra fields in the local header may differ from those in the central directory. + /// The local header may contain additional or different metadata compared to the + /// central directory entry. + pub fn extra_fields(&self) -> ExtraFields<'a> { + self.extra_fields + } +} + +#[derive(Debug, Clone)] +pub(crate) struct DataDescriptor { + crc: u32, +} + +impl DataDescriptor { + const SIZE: usize = 8; + pub const SIGNATURE: u32 = 0x08074b50; + + fn parse(data: &[u8]) -> Result { + if data.len() < Self::SIZE { + return Err(Error::from(ErrorKind::Eof)); + } + + let mut pos = 0; + + let potential_signature = le_u32(&data[0..4]); + if potential_signature == Self::SIGNATURE { + pos += 4; + } + + // The crc is followed by the compressed_size and then the + // uncompressed_size but the spec allows for the sizes to be either 4 + // bytes each or 8 bytes in Zip64 mode. (spec 4.3.9.1). They aren't + // needed, so we skip them. + Ok(DataDescriptor { + crc: le_u32(&data[pos..pos + 4]), + }) + } + + fn read_at(reader: R, offset: u64) -> Result + where + R: ReaderAt, + { + let mut buffer = [0u8; Self::SIZE]; + reader.read_exact_at(&mut buffer, offset)?; + Self::parse(&buffer) + } +} + +/// A lending iterator over file header records in a [`ZipArchive`]. +#[derive(Debug)] +pub struct ZipEntries<'archive, 'buf, R> { + buffer: &'buf mut [u8], + archive: &'archive ZipArchive, + pos: usize, + end: usize, + offset: u64, + base_offset: u64, + central_dir_end_pos: u64, +} + +impl ZipEntries<'_, '_, R> +where + R: ReaderAt, +{ + /// Yield the next zip file entry in the central directory if there is any + /// + /// This method reads from the underlying archive reader into the provided + /// buffer to parse entry headers. + #[inline] + pub fn next_entry(&mut self) -> Result>, Error> { + if self.pos + ZipFileHeaderFixed::SIZE >= self.end { + if self.offset >= self.central_dir_end_pos { + return Ok(None); + } + + let remaining = self.end - self.pos; + self.buffer.copy_within(self.pos..self.end, 0); + let max_read = ((self.central_dir_end_pos - self.offset) as usize) + .min(self.buffer.len() - remaining); + let read = self.archive.reader.read_at_least_at( + &mut self.buffer[remaining..][..max_read], + ZipFileHeaderFixed::SIZE, + self.offset, + )?; + self.offset += read as u64; + self.pos = 0; + self.end = remaining + read; + } + + let central_directory_offset = self.offset - (self.end - self.pos) as u64; + let data = &self.buffer[self.pos..self.end]; + let file_header = ZipFileHeaderFixed::parse(data)?; + self.pos += ZipFileHeaderFixed::SIZE; + + let variable_length = file_header.variable_length(); + if self.pos + variable_length > self.end { + // Need to read more data + let remaining = self.end - self.pos; + self.buffer.copy_within(self.pos..self.end, 0); + let max_read = ((self.central_dir_end_pos - self.offset) as usize) + .min(self.buffer.len() - remaining); + let read = self.archive.reader.read_at_least_at( + &mut self.buffer[remaining..][..max_read], + variable_length - remaining, + self.offset, + )?; + self.offset += read as u64; + self.pos = 0; + self.end = remaining + read; + } + + let data = &self.buffer[self.pos..self.end]; + let (file_name, extra_field, file_comment, _) = file_header + .parse_variable_length(data) + .expect("variable length precheck failed"); + let mut file_header = ZipFileHeaderRecord::from_parts( + file_header, + file_name, + extra_field, + file_comment, + central_directory_offset, + ); + file_header.local_header_offset += self.base_offset; + self.pos += variable_length; + Ok(Some(file_header)) + } +} + +/// 4.4.2 +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) struct VersionMadeBy(u16); + +#[allow(dead_code)] +impl VersionMadeBy { + pub fn as_u16(&self) -> u16 { + self.0 + } + + /// The (major, minor) ZIP specification version supported by the software + /// used to encode the file. + /// + /// 4.4.2.3: The lower byte, The value / 10 indicates the major version + /// number, and the value mod 10 is the minor version number. + pub fn version(&self) -> (u8, u8) { + let v = (self.0 >> 8) as u8; + (v / 10, v % 10) + } +} + +#[derive(Debug, Clone)] +pub(crate) struct Zip64EndOfCentralDirectory { + pub offset: u64, + pub central_dir_offset: u64, + pub central_dir_size: u64, + pub num_entries: u64, +} + +impl Zip64EndOfCentralDirectory { + #[inline] + pub fn from_parts(offset: u64, record: Zip64EndOfCentralDirectoryRecord) -> Self { + Self { + offset, + central_dir_offset: record.central_dir_offset, + central_dir_size: record.central_dir_size, + num_entries: record.num_entries, + } + } +} + +#[derive(Debug, Clone)] +pub(crate) struct Zip64EndOfCentralDirectoryRecord { + /// zip64 end of central dir signature + pub signature: u32, + + /// size of zip64 end of central directory record + #[allow(dead_code)] + pub size: u64, + + /// version made by + #[allow(dead_code)] + pub version_made_by: VersionMadeBy, + + /// version needed to extract + #[allow(dead_code)] + pub version_needed: u16, + + /// number of this disk + #[allow(dead_code)] + pub disk_number: u32, + + /// number of the disk with the start of the central directory + #[allow(dead_code)] + pub cd_disk: u32, + + /// total number of entries in the central directory on this disk + pub num_entries: u64, + + /// total number of entries in the central directory + #[allow(dead_code)] + pub total_entries: u64, + + /// size of the central directory + pub central_dir_size: u64, + + /// offset of start of central directory with respect to the starting disk number + pub central_dir_offset: u64, + // zip64 extensible data sector + // pub extensible_data: Vec, +} + +impl Zip64EndOfCentralDirectoryRecord { + pub(crate) const SIZE: usize = 56; + + #[inline] + pub fn parse(data: &[u8]) -> Result { + if data.len() < Self::SIZE { + return Err(Error::from(ErrorKind::Eof)); + } + + let result = Zip64EndOfCentralDirectoryRecord { + signature: le_u32(&data[0..4]), + size: le_u64(&data[4..12]), + version_made_by: VersionMadeBy(le_u16(&data[12..14])), + version_needed: le_u16(&data[14..16]), + disk_number: le_u32(&data[16..20]), + cd_disk: le_u32(&data[20..24]), + num_entries: le_u64(&data[24..32]), + total_entries: le_u64(&data[32..40]), + central_dir_size: le_u64(&data[40..48]), + central_dir_offset: le_u64(&data[48..56]), + }; + + if result.signature != END_OF_CENTRAL_DIR_SIGNATURE64 { + return Err(Error::from(ErrorKind::InvalidSignature { + expected: END_OF_CENTRAL_DIR_SIGNATURE64, + actual: result.signature, + })); + } + + Ok(result) + } +} + +/// A numeric identifier for a compression method used in a Zip archive. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct CompressionMethodId(u16); + +impl CompressionMethodId { + /// Returns the raw `u16` value of the compression method ID. + #[inline] + pub fn as_u16(&self) -> u16 { + self.0 + } + + /// Converts the numeric ID to a `CompressionMethod` enum. + #[inline] + pub fn as_method(&self) -> CompressionMethod { + match self.0 { + 0 => CompressionMethod::Store, + 1 => CompressionMethod::Shrunk, + 2 => CompressionMethod::Reduce1, + 3 => CompressionMethod::Reduce2, + 4 => CompressionMethod::Reduce3, + 5 => CompressionMethod::Reduce4, + 6 => CompressionMethod::Imploded, + 7 => CompressionMethod::Tokenizing, + 8 => CompressionMethod::Deflate, + 9 => CompressionMethod::Deflate64, + 10 => CompressionMethod::Terse, + 12 => CompressionMethod::Bzip2, + 14 => CompressionMethod::Lzma, + 18 => CompressionMethod::Lz77, + 20 => CompressionMethod::ZstdDeprecated, + 93 => CompressionMethod::Zstd, + 94 => CompressionMethod::Mp3, + 95 => CompressionMethod::Xz, + 96 => CompressionMethod::Jpeg, + 97 => CompressionMethod::WavPack, + 98 => CompressionMethod::Ppmd, + 99 => CompressionMethod::Aes, + _ => CompressionMethod::Unknown(self.0), + } + } +} + +/// The compression method used on an individual Zip archive entry +/// +/// Documented in the spec under: 4.4.5 +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u16)] +pub enum CompressionMethod { + Store = 0, + Shrunk = 1, + Reduce1 = 2, + Reduce2 = 3, + Reduce3 = 4, + Reduce4 = 5, + Imploded = 6, + Tokenizing = 7, + Deflate = 8, + Deflate64 = 9, + Terse = 10, + Bzip2 = 12, + Lzma = 14, + Lz77 = 18, + ZstdDeprecated = 20, + Zstd = 93, + Mp3 = 94, + Xz = 95, + Jpeg = 96, + WavPack = 97, + Ppmd = 98, + Aes = 99, + Unknown(u16), +} + +impl CompressionMethod { + /// Return the numeric id of this compression method. + #[inline] + pub fn as_id(&self) -> CompressionMethodId { + let value = match self { + CompressionMethod::Store => 0, + CompressionMethod::Shrunk => 1, + CompressionMethod::Reduce1 => 2, + CompressionMethod::Reduce2 => 3, + CompressionMethod::Reduce3 => 4, + CompressionMethod::Reduce4 => 5, + CompressionMethod::Imploded => 6, + CompressionMethod::Tokenizing => 7, + CompressionMethod::Deflate => 8, + CompressionMethod::Deflate64 => 9, + CompressionMethod::Terse => 10, + CompressionMethod::Bzip2 => 12, + CompressionMethod::Lzma => 14, + CompressionMethod::Lz77 => 18, + CompressionMethod::ZstdDeprecated => 20, + CompressionMethod::Zstd => 93, + CompressionMethod::Mp3 => 94, + CompressionMethod::Xz => 95, + CompressionMethod::Jpeg => 96, + CompressionMethod::WavPack => 97, + CompressionMethod::Ppmd => 98, + CompressionMethod::Aes => 99, + CompressionMethod::Unknown(id) => *id, + }; + CompressionMethodId(value) + } +} + +impl From for CompressionMethod { + fn from(id: u16) -> Self { + CompressionMethodId(id).as_method() + } +} + +/// A borrowed data from a Zip archive, typically for comments or non-path text. +/// +/// Zip archives may contain text that is not strictly UTF-8. This type +/// represents such text as a byte slice. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct ZipStr<'a>(&'a [u8]); + +impl<'a> ZipStr<'a> { + /// Creates a new `ZipStr` from a byte slice. + #[inline] + pub fn new(data: &'a [u8]) -> Self { + Self(data) + } + + /// Returns the underlying byte slice. + #[inline] + pub fn as_bytes(&self) -> &'a [u8] { + self.0 + } + + /// Converts the borrowed `ZipStr` into an owned `ZipString` by cloning the + /// data. + #[inline] + pub fn into_owned(&self) -> ZipString { + ZipString::new(self.0.to_vec()) + } +} + +/// An owned string (`Vec`) from a Zip archive, typically for comments or non-path text. +/// +/// Similar to `ZipStr`, but owns its data. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct ZipString(Vec); + +impl ZipString { + /// Creates a new `ZipString` from a vector of bytes. + #[inline] + pub fn new(data: Vec) -> Self { + Self(data) + } + + /// Returns a borrowed `ZipStr` view of this `ZipString`. + #[inline] + pub fn as_str(&self) -> ZipStr<'_> { + ZipStr::new(self.0.as_slice()) + } +} + +/// Represents a record from the Zip archive's central directory for a single +/// file +/// +/// This contains metadata about the file. If interested in navigating to the +/// file contents, use `[ZipFileHeaderRecord::wayfinder]`. +/// +/// Reference 4.3.12 in the zip specification +#[derive(Debug, Clone)] +#[allow(dead_code)] +pub struct ZipFileHeaderRecord<'a> { + signature: u32, + version_made_by: u16, + version_needed: u16, + flags: u16, + compression_method: CompressionMethodId, + last_mod_time: u16, + last_mod_date: u16, + crc32: u32, + compressed_size: u64, + uncompressed_size: u64, + file_name_len: u16, + extra_field_len: u16, + file_comment_len: u16, + disk_number_start: u32, + internal_file_attrs: u16, + external_file_attrs: u32, + local_header_offset: u64, + central_directory_offset: u64, + file_name: ZipFilePath>, + extra_field: &'a [u8], + file_comment: ZipStr<'a>, + is_zip64: bool, +} + +impl<'a> ZipFileHeaderRecord<'a> { + #[inline] + fn from_parts( + header: ZipFileHeaderFixed, + file_name: &'a [u8], + extra_field: &'a [u8], + file_comment: &'a [u8], + central_directory_offset: u64, + ) -> Self { + let mut result = Self { + signature: header.signature, + version_made_by: header.version_made_by, + version_needed: header.version_needed, + flags: header.flags, + compression_method: header.compression_method, + last_mod_time: header.last_mod_time, + last_mod_date: header.last_mod_date, + crc32: header.crc32, + compressed_size: u64::from(header.compressed_size), + uncompressed_size: u64::from(header.uncompressed_size), + file_name_len: header.file_name_len, + extra_field_len: header.extra_field_len, + file_comment_len: header.file_comment_len, + disk_number_start: u32::from(header.disk_number_start), + internal_file_attrs: header.internal_file_attrs, + external_file_attrs: header.external_file_attrs, + local_header_offset: u64::from(header.local_header_offset), + central_directory_offset, + file_name: ZipFilePath::from_bytes(file_name), + extra_field, + file_comment: ZipStr::new(file_comment), + is_zip64: false, + }; + + if result.uncompressed_size != u64::from(u32::MAX) + && result.compressed_size != u64::from(u32::MAX) + && result.local_header_offset != u64::from(u32::MAX) + && result.disk_number_start != u32::from(u16::MAX) + { + return result; + } + + let extra_fields = ExtraFields::new(extra_field); + for (field_id, field_data) in extra_fields { + if field_id != ExtraFieldId::ZIP64 { + continue; + } + + let mut field = field_data; + + result.is_zip64 = true; + + if header.uncompressed_size == u32::MAX { + let Some(uncompressed_size) = field.get(..8).map(le_u64) else { + break; + }; + result.uncompressed_size = uncompressed_size; + field = &field[8..]; + } + + if header.compressed_size == u32::MAX { + let Some(compressed_size) = field.get(..8).map(le_u64) else { + break; + }; + result.compressed_size = compressed_size; + field = &field[8..]; + } + + if header.local_header_offset == u32::MAX { + let Some(local_header_offset) = field.get(..8).map(le_u64) else { + break; + }; + result.local_header_offset = local_header_offset; + field = &field[8..]; + } + + if header.disk_number_start == u16::MAX { + let Some(disk_number_start) = field.get(..4).map(le_u32) else { + break; + }; + result.disk_number_start = disk_number_start; + } + + break; + } + + result + } + + /// Describes if the file is a directory. + /// + /// See [`ZipFilePath::is_dir`] for more information. + #[inline] + pub fn is_dir(&self) -> bool { + self.file_name.is_dir() + } + + /// Returns true if the entry has a data descriptor that follows its + /// compressed data. + /// + /// From the spec (4.3.9.1): + /// + /// > This descriptor MUST exist if bit 3 of the general purpose bit flag is + /// > set + #[inline] + pub fn has_data_descriptor(&self) -> bool { + self.flags & 0x08 != 0 + } + + /// Describes where the file's data is located within the archive. + #[inline] + pub fn wayfinder(&self) -> ZipArchiveEntryWayfinder { + ZipArchiveEntryWayfinder { + uncompressed_size: self.uncompressed_size, + compressed_size: self.compressed_size, + local_header_offset: self.local_header_offset, + has_data_descriptor: self.has_data_descriptor(), + crc: self.crc32, + } + } + + /// The purported number of bytes of the uncompressed data. + /// + /// **WARNING**: this number has not yet been validated, so don't trust it + /// to make allocation decisions. + #[inline] + pub fn uncompressed_size_hint(&self) -> u64 { + self.uncompressed_size + } + + /// The purported number of bytes of the compressed data. + /// + /// **WARNING**: this number has not yet been validated, so don't trust it + /// to make allocation decisions. + #[inline] + pub fn compressed_size_hint(&self) -> u64 { + self.compressed_size + } + + /// The declared offset to the local file header within the Zip archive. + /// + /// To verify the validity of this offset, call + /// [`ZipSliceArchive::get_entry`] or [`ZipArchive::get_entry`]. + /// + /// The minimum of all local header offsets (or `directory_offset()` when a + /// zip is empty), will be the length of prelude data in a zip archive (data + /// that is unrelated to the zip archive). + /// + /// See [`RangeReader`] for an example. + #[inline] + pub fn local_header_offset(&self) -> u64 { + self.local_header_offset + } + + /// The compression method used to compress the data + #[inline] + pub fn compression_method(&self) -> CompressionMethod { + self.compression_method.as_method() + } + + /// Returns the file path in its raw form. + /// + /// # Safety + /// + /// The raw path may contain unsafe components like: + /// - Absolute paths (`/etc/passwd`) + /// - Directory traversal (`../../../etc/passwd`) + /// - Invalid UTF-8 sequences + /// + /// # Example + /// ```rust + /// # use soapberry_zip::ZipArchive; + /// # fn example() -> Result<(), Box> { + /// # let data = include_bytes!("../assets/test.zip"); + /// # let archive = ZipArchive::from_slice(data)?; + /// # let mut entries = archive.entries(); + /// # let entry = entries.next_entry()?.unwrap(); + /// // Get raw path (potentially unsafe) + /// let raw_path = entry.file_path(); + /// + /// // Convert to safe path + /// let safe_path = raw_path.try_normalize()?; + /// println!("Safe path: {}", safe_path.as_ref()); + /// + /// // Check if it's a directory + /// if safe_path.is_dir() { + /// println!("This is a directory"); + /// } + /// # Ok(()) + /// # } + /// ``` + #[inline] + pub fn file_path(&self) -> ZipFilePath> { + self.file_name + } + + /// Returns the last modification date and time. + /// + /// This method parses the extra field data to locate more accurate timestamps. + #[inline] + pub fn last_modified(&self) -> ZipDateTimeKind { + extract_best_timestamp(self.extra_fields(), self.last_mod_time, self.last_mod_date) + } + + /// Returns the file mode information extracted from the external file attributes. + #[inline] + pub fn mode(&self) -> EntryMode { + let creator_version = self.version_made_by >> 8; + + let mut mode = match creator_version { + // Unix and macOS + CREATOR_UNIX | CREATOR_MACOS => unix_mode_to_file_mode(self.external_file_attrs >> 16), + // NTFS, VFAT, FAT + CREATOR_NTFS | CREATOR_VFAT | CREATOR_FAT => { + msdos_mode_to_file_mode(self.external_file_attrs) + }, + // default to basic permissions + _ => 0o644, + }; + + // Check if it's a directory by filename ending with '/' + if self.is_dir() { + mode |= 0o040000; // S_IFDIR + } + + EntryMode::new(mode) + } + + /// The declared CRC32 checksum of the uncompressed data. + /// + /// To verify the validity of this value, [`ZipEntry::verifying_reader`] + /// will return an error if when the decompressed data does not match this + /// checksum. + #[inline] + pub fn crc32(&self) -> u32 { + self.crc32 + } + + /// Returns the offset from the start of reader where this central directory + /// record was parsed from. + #[inline] + pub fn central_directory_offset(&self) -> u64 { + self.central_directory_offset + } + + /// Returns an iterator over the extra fields in this file header record. + /// + /// Extra fields contain additional metadata about files in ZIP archives, + /// such as timestamps, alignment information, and platform-specific data. + /// + /// # Examples + /// + /// ```rust + /// # use soapberry_zip::{ZipArchive, extra_fields::ExtraFieldId}; + /// # fn example(data: &[u8]) -> Result<(), Box> { + /// let archive = ZipArchive::from_slice(data)?; + /// for entry_result in archive.entries() { + /// let entry = entry_result?; + /// let mut extra_fields = entry.extra_fields(); + /// for (field_id, field_data) in extra_fields.by_ref() { + /// match field_id { + /// ExtraFieldId::JAVA_JAR => { + /// println!("Handle jar CAFE field with {} bytes", field_data.len()); + /// } + /// _ => { + /// println!("Found extra field ID: 0x{:04x}", field_id.as_u16()); + /// } + /// } + /// } + /// + /// // If desired, check for truncated data + /// if !extra_fields.remaining_bytes().is_empty() { + /// println!("Warning: Some extra field data was truncated"); + /// } + /// } + /// # Ok(()) + /// # } + /// ``` + /// + /// Raw access to the entire extra field data is available when + /// `remaining_bytes` is called prior to any iteration. + #[inline] + pub fn extra_fields(&self) -> ExtraFields<'_> { + ExtraFields::new(self.extra_field) + } +} + +/// Contains directions to where the Zip entry's data is located within the Zip archive. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ZipArchiveEntryWayfinder { + uncompressed_size: u64, + compressed_size: u64, + local_header_offset: u64, + crc: u32, + has_data_descriptor: bool, +} + +impl ZipArchiveEntryWayfinder { + /// Equivalent to [`ZipFileHeaderRecord::compressed_size_hint`] + /// + /// This is a convenience method to avoid having to deal with lifetime + /// issues on a `ZipFileHeaderRecord` + #[inline] + pub fn uncompressed_size_hint(&self) -> u64 { + self.uncompressed_size + } + + /// Equivalent to [`ZipFileHeaderRecord::compressed_size_hint`] + /// + /// This is a convenience method to avoid having to deal with lifetime + /// issues on a `ZipFileHeaderRecord` + #[inline] + pub fn compressed_size_hint(&self) -> u64 { + self.compressed_size + } +} + +#[derive(Debug, Clone)] +pub(crate) struct ZipLocalFileHeaderFixed { + pub(crate) signature: u32, + pub(crate) version_needed: u16, + pub(crate) flags: u16, + pub(crate) compression_method: CompressionMethodId, + pub(crate) last_mod_time: u16, + pub(crate) last_mod_date: u16, + pub(crate) crc32: u32, + pub(crate) compressed_size: u32, + pub(crate) uncompressed_size: u32, + pub(crate) file_name_len: u16, + pub(crate) extra_field_len: u16, +} + +impl ZipLocalFileHeaderFixed { + const SIZE: usize = 30; + pub const SIGNATURE: u32 = 0x04034b50; + + pub fn parse(data: &[u8]) -> Result { + if data.len() < Self::SIZE { + return Err(Error::from(ErrorKind::Eof)); + } + + let result = ZipLocalFileHeaderFixed { + signature: le_u32(&data[0..4]), + version_needed: le_u16(&data[4..6]), + flags: le_u16(&data[6..8]), + compression_method: CompressionMethodId(le_u16(&data[8..10])), + last_mod_time: le_u16(&data[10..12]), + last_mod_date: le_u16(&data[12..14]), + crc32: le_u32(&data[14..18]), + compressed_size: le_u32(&data[18..22]), + uncompressed_size: le_u32(&data[22..26]), + file_name_len: le_u16(&data[26..28]), + extra_field_len: le_u16(&data[28..30]), + }; + + if result.signature != Self::SIGNATURE { + return Err(Error::from(ErrorKind::InvalidSignature { + expected: Self::SIGNATURE, + actual: result.signature, + })); + } + + Ok(result) + } + + pub fn variable_length(&self) -> usize { + self.file_name_len as usize + self.extra_field_len as usize + } + + pub fn write(&self, mut writer: W) -> Result<(), Error> + where + W: Write, + { + // Batch writes with a fixed size buffer. Improved throughput 25% + let mut buffer = [0u8; 30]; + buffer[..4].copy_from_slice(&self.signature.to_le_bytes()); + buffer[4..6].copy_from_slice(&self.version_needed.to_le_bytes()); + buffer[6..8].copy_from_slice(&self.flags.to_le_bytes()); + buffer[8..10].copy_from_slice(&self.compression_method.0.to_le_bytes()); + buffer[10..12].copy_from_slice(&self.last_mod_time.to_le_bytes()); + buffer[12..14].copy_from_slice(&self.last_mod_date.to_le_bytes()); + buffer[14..18].copy_from_slice(&self.crc32.to_le_bytes()); + buffer[18..22].copy_from_slice(&self.compressed_size.to_le_bytes()); + buffer[22..26].copy_from_slice(&self.uncompressed_size.to_le_bytes()); + buffer[26..28].copy_from_slice(&self.file_name_len.to_le_bytes()); + buffer[28..30].copy_from_slice(&self.extra_field_len.to_le_bytes()); + writer.write_all(&buffer)?; + Ok(()) + } +} + +#[derive(Debug, Clone)] +pub(crate) struct ZipFileHeaderFixed { + pub signature: u32, + pub version_made_by: u16, + pub version_needed: u16, + pub flags: u16, + pub compression_method: CompressionMethodId, + pub last_mod_time: u16, + pub last_mod_date: u16, + pub crc32: u32, + pub compressed_size: u32, + pub uncompressed_size: u32, + pub file_name_len: u16, + pub extra_field_len: u16, + pub file_comment_len: u16, + pub disk_number_start: u16, + pub internal_file_attrs: u16, + pub external_file_attrs: u32, + pub local_header_offset: u32, +} + +impl ZipFileHeaderFixed { + pub fn variable_length(&self) -> usize { + self.file_name_len as usize + self.extra_field_len as usize + self.file_comment_len as usize + } +} + +type VariableFields<'a> = ( + &'a [u8], // file_name + &'a [u8], // extra_field + &'a [u8], // file_comment + &'a [u8], // rest of the data +); + +impl ZipFileHeaderFixed { + pub(crate) const SIZE: usize = 46; + + #[inline] + pub fn parse(data: &[u8]) -> Result { + if data.len() < Self::SIZE { + return Err(Error::from(ErrorKind::Eof)); + } + + let result = ZipFileHeaderFixed { + signature: le_u32(&data[0..4]), + version_made_by: le_u16(&data[4..6]), + version_needed: le_u16(&data[6..8]), + flags: le_u16(&data[8..10]), + compression_method: CompressionMethodId(le_u16(&data[10..12])), + last_mod_time: le_u16(&data[12..14]), + last_mod_date: le_u16(&data[14..16]), + crc32: le_u32(&data[16..20]), + compressed_size: le_u32(&data[20..24]), + uncompressed_size: le_u32(&data[24..28]), + file_name_len: le_u16(&data[28..30]), + extra_field_len: le_u16(&data[30..32]), + file_comment_len: le_u16(&data[32..34]), + disk_number_start: le_u16(&data[34..36]), + internal_file_attrs: le_u16(&data[36..38]), + external_file_attrs: le_u32(&data[38..42]), + local_header_offset: le_u32(&data[42..46]), + }; + + if result.signature != CENTRAL_HEADER_SIGNATURE { + return Err(Error::from(ErrorKind::InvalidSignature { + expected: CENTRAL_HEADER_SIGNATURE, + actual: result.signature, + })); + } + + Ok(result) + } + + #[inline] + fn parse_variable_length<'a>(&self, data: &'a [u8]) -> Option> { + if data.len() < self.file_name_len as usize { + return None; + } + let (file_name, rest) = data.split_at(self.file_name_len as usize); + + if rest.len() < self.extra_field_len as usize { + return None; + } + let (extra_field, rest) = rest.split_at(self.extra_field_len as usize); + + if rest.len() < self.file_comment_len as usize { + return None; + } + let (file_comment, rest) = rest.split_at(self.file_comment_len as usize); + + Some((file_name, extra_field, file_comment, rest)) + } + + pub fn write(&self, mut writer: W) -> Result<(), Error> + where + W: Write, + { + // Batch writes with a fixed size buffer. Improved throughput 25% + let mut buffer = [0u8; Self::SIZE]; + buffer[0..4].copy_from_slice(&self.signature.to_le_bytes()); + buffer[4..6].copy_from_slice(&self.version_made_by.to_le_bytes()); + buffer[6..8].copy_from_slice(&self.version_needed.to_le_bytes()); + buffer[8..10].copy_from_slice(&self.flags.to_le_bytes()); + buffer[10..12].copy_from_slice(&self.compression_method.0.to_le_bytes()); + buffer[12..14].copy_from_slice(&self.last_mod_time.to_le_bytes()); + buffer[14..16].copy_from_slice(&self.last_mod_date.to_le_bytes()); + buffer[16..20].copy_from_slice(&self.crc32.to_le_bytes()); + buffer[20..24].copy_from_slice(&self.compressed_size.to_le_bytes()); + buffer[24..28].copy_from_slice(&self.uncompressed_size.to_le_bytes()); + buffer[28..30].copy_from_slice(&self.file_name_len.to_le_bytes()); + buffer[30..32].copy_from_slice(&self.extra_field_len.to_le_bytes()); + buffer[32..34].copy_from_slice(&self.file_comment_len.to_le_bytes()); + buffer[34..36].copy_from_slice(&self.disk_number_start.to_le_bytes()); + buffer[36..38].copy_from_slice(&self.internal_file_attrs.to_le_bytes()); + buffer[38..42].copy_from_slice(&self.external_file_attrs.to_le_bytes()); + buffer[42..46].copy_from_slice(&self.local_header_offset.to_le_bytes()); + writer.write_all(&buffer)?; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Cursor; + + #[test] + pub fn blank_zip_archive() { + let data = [80, 75, 5, 6]; + let mut buf = vec![0u8; RECOMMENDED_BUFFER_SIZE]; + let archive = ZipArchive::from_seekable(Cursor::new(data), &mut buf); + assert!(archive.is_err()); + } + + #[test] + pub fn trunc_comment_zips() { + let data = [ + 80, 75, 6, 7, 21, 0, 0, 0, 34, 0, 0, 0, 0, 0, 0, 0, 10, 0, 59, 59, 80, 75, 5, 6, 0, + 255, 255, 255, 255, 255, 255, 0, 0, 0, 80, 75, 6, 6, 0, 0, 0, 10, + ]; + let mut buf = vec![0u8; RECOMMENDED_BUFFER_SIZE]; + let archive = ZipArchive::from_seekable(Cursor::new(data), &mut buf); + assert!(archive.is_err()); + + let archive = ZipArchive::from_slice(data); + assert!(archive.is_err()); + } + + #[test] + pub fn trunc_eocd64() { + let data = [ + 80, 75, 6, 7, 21, 0, 0, 0, 34, 0, 0, 0, 0, 0, 0, 0, 10, 0, 59, 59, 80, 75, 5, 6, 0, + 255, 255, 255, 255, 255, 255, 0, 0, 0, 80, 75, 6, 6, 0, 0, 6, 0, 0, 250, 255, 255, 255, + 255, 251, 0, 0, 0, 0, 80, 5, 6, 0, 0, 0, 0, 56, 0, 0, 0, 0, 10, + ]; + + let archive = ZipArchive::from_slice(data); + assert!(archive.is_err()); + + let mut buf = vec![0u8; RECOMMENDED_BUFFER_SIZE]; + let archive = ZipArchive::from_seekable(Cursor::new(data), &mut buf); + assert!(archive.is_err()); + } + + #[test] + pub fn trunc_eocd_entry() { + let data = [ + 80, 75, 1, 2, 159, 159, 159, 159, 159, 159, 159, 159, 159, 0, 241, 205, 0, 80, 75, 5, + 6, 0, 48, 249, 0, 250, 255, 255, 255, 255, 251, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 35, 0, + ]; + + let archive = ZipArchive::from_slice(data).unwrap(); + let mut entries = archive.entries(); + assert!(entries.next_entry().is_err()); + + let mut buf = vec![0u8; RECOMMENDED_BUFFER_SIZE]; + let archive = ZipArchive::from_seekable(Cursor::new(data), &mut buf).unwrap(); + let mut entries = archive.entries(&mut buf); + assert!(entries.next_entry().is_err()); + } + + #[test] + fn test_compressed_data_range() { + let test_zip = std::fs::read("assets/test.zip").unwrap(); + + // Test ZipSliceEntry API (from slice) + let slice_archive = ZipArchive::from_slice(&test_zip).unwrap(); + let slice_header_records: Vec<_> = slice_archive + .entries() + .collect::, _>>() + .unwrap(); + assert_eq!(slice_header_records.len(), 2); + + let entry1_wayfinder = slice_header_records[0].wayfinder(); + let slice_entry1 = slice_archive.get_entry(entry1_wayfinder).unwrap(); + let slice_range1 = slice_entry1.compressed_data_range(); + assert_eq!( + slice_range1, + (66, 91), + "test.txt compressed data should be at bytes 66-91" + ); + + let entry2_wayfinder = slice_header_records[1].wayfinder(); + let slice_entry2 = slice_archive.get_entry(entry2_wayfinder).unwrap(); + let slice_range2 = slice_entry2.compressed_data_range(); + assert_eq!( + slice_range2, + (169, 954), + "gophercolor16x16.png compressed data should be at bytes 169-954" + ); + + // Test ZipEntry API + let file = std::fs::File::open("assets/test.zip").unwrap(); + let mut buffer = vec![0u8; RECOMMENDED_BUFFER_SIZE]; + let reader_archive = ZipArchive::from_file(file, &mut buffer).unwrap(); + + // Get wayfinders from the slice archive since they should be identical + let reader_entry1 = reader_archive.get_entry(entry1_wayfinder).unwrap(); + let reader_range1 = reader_entry1.compressed_data_range(); + + let reader_entry2 = reader_archive.get_entry(entry2_wayfinder).unwrap(); + let reader_range2 = reader_entry2.compressed_data_range(); + + // Verify both APIs return identical ranges + assert_eq!(slice_range1, reader_range1); + assert_eq!(slice_range2, reader_range2); + } +} diff --git a/crates/soapberry-zip/src/crc.rs b/crates/soapberry-zip/src/crc.rs new file mode 100644 index 0000000..6b326c7 --- /dev/null +++ b/crates/soapberry-zip/src/crc.rs @@ -0,0 +1,36 @@ +/// Compute the CRC32 (IEEE) of a byte slice. +/// +/// Uses `crc32fast` which provides hardware-accelerated CRC32 using +/// SIMD/PCLMULQDQ instructions when available, falling back to a fast +/// software implementation otherwise. +#[inline] +pub fn crc32(data: &[u8]) -> u32 { + crc32fast::hash(data) +} + +/// Compute CRC32 incrementally, combining with a previous CRC value. +/// +/// This is useful for streaming CRC32 computation where data arrives in chunks. +#[inline] +pub fn crc32_chunk(data: &[u8], prev: u32) -> u32 { + let mut hasher = crc32fast::Hasher::new_with_initial(prev); + hasher.update(data); + hasher.finalize() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_crc() { + // Test known CRC32 value + let data = b"EU4txt\nchecksum=\"ced5411e2d4a5ec724595c2c4f1b7347\""; + assert_eq!(crc32(data), 1702863696); + + // Test incremental CRC32 + let full = crc32(b"hello world"); + let incremental = crc32_chunk(b" world", crc32(b"hello")); + assert_eq!(full, incremental); + } +} diff --git a/crates/soapberry-zip/src/errors.rs b/crates/soapberry-zip/src/errors.rs new file mode 100644 index 0000000..4277126 --- /dev/null +++ b/crates/soapberry-zip/src/errors.rs @@ -0,0 +1,171 @@ +/// An error that occurred while reading or writing a zip file +#[derive(Debug)] +pub struct Error { + inner: Box, +} + +impl Error { + /// Returns the offset of the end of central directory (EOCD) signature + /// + /// Useful for reparsing input that contains a false EOCD signature. + pub fn eocd_offset(&self) -> Option { + self.inner.eocd_offset + } + + /// Sets the false signature offset on this error + pub(crate) fn with_eocd_offset(mut self, offset: u64) -> Self { + self.inner.eocd_offset = Some(offset); + self + } +} + +impl Error { + pub(crate) fn io(err: std::io::Error) -> Error { + Error::from(ErrorKind::IO(err)) + } + + pub(crate) fn utf8(err: std::str::Utf8Error) -> Error { + Error::from(ErrorKind::InvalidUtf8(err)) + } + + pub(crate) fn is_eof(&self) -> bool { + matches!(self.inner.kind, ErrorKind::Eof) + } + + /// The kind of error that occurred + pub fn kind(&self) -> &ErrorKind { + &self.inner.kind + } +} + +#[derive(Debug)] +struct ErrorInner { + kind: ErrorKind, + eocd_offset: Option, +} + +/// The kind of error that occurred +#[derive(Debug)] +#[non_exhaustive] +pub enum ErrorKind { + /// Missing end of central directory + MissingEndOfCentralDirectory, + + /// Missing zip64 end of central directory + MissingZip64EndOfCentralDirectory, + + /// Buffer size too small + BufferTooSmall, + + /// Invalid end of central directory signature + InvalidSignature { expected: u32, actual: u32 }, + + /// Invalid inflated file crc checksum + InvalidChecksum { expected: u32, actual: u32 }, + + /// An unexpected inflated file size + InvalidSize { expected: u64, actual: u64 }, + + /// Invalid UTF-8 sequence + InvalidUtf8(std::str::Utf8Error), + + /// An invalid input error with associated message + InvalidInput { msg: String }, + + /// Could not construct an archive with the given end of central directory + InvalidEndOfCentralDirectory, + + /// An IO error + IO(std::io::Error), + + /// An IO error (alias for compatibility) + Io(std::io::Error), + + /// An unexpected end of file + Eof, + + /// File not found in archive + FileNotFound(String), + + /// Unsupported compression method + UnsupportedCompressionMethod(u16), +} + +impl std::error::Error for Error {} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}", self.inner.kind)?; + Ok(()) + } +} + +impl std::fmt::Display for ErrorKind { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match *self { + ErrorKind::IO(ref err) => err.fmt(f), + ErrorKind::MissingEndOfCentralDirectory => { + write!(f, "Missing end of central directory") + }, + ErrorKind::MissingZip64EndOfCentralDirectory => { + write!(f, "Missing zip64 end of central directory") + }, + ErrorKind::BufferTooSmall => { + write!(f, "Buffer size too small") + }, + ErrorKind::Eof => { + write!(f, "Unexpected end of file") + }, + ErrorKind::InvalidSignature { expected, actual } => { + write!( + f, + "Invalid signature: expected 0x{:08x}, got 0x{:08x}", + expected, actual + ) + }, + ErrorKind::InvalidChecksum { expected, actual } => { + write!( + f, + "Invalid checksum: expected 0x{:08x}, got 0x{:08x}", + expected, actual + ) + }, + ErrorKind::InvalidSize { expected, actual } => { + write!(f, "Invalid size: expected {}, got {}", expected, actual) + }, + ErrorKind::InvalidUtf8(ref err) => { + write!(f, "Invalid UTF-8: {}", err) + }, + ErrorKind::InvalidInput { ref msg } => { + write!(f, "Invalid input: {}", msg) + }, + ErrorKind::InvalidEndOfCentralDirectory => { + write!(f, "Invalid end of central directory") + }, + ErrorKind::Io(ref err) => err.fmt(f), + ErrorKind::FileNotFound(ref name) => { + write!(f, "File not found in archive: {}", name) + }, + ErrorKind::UnsupportedCompressionMethod(method) => { + write!(f, "Unsupported compression method: {}", method) + }, + } + } +} + +impl From for Error { + fn from(kind: ErrorKind) -> Error { + Error { + inner: Box::new(ErrorInner { + kind, + eocd_offset: None, + }), + } + } +} + +impl From for Error { + fn from(err: std::io::Error) -> Error { + Error::from(ErrorKind::IO(err)) + } +} diff --git a/crates/soapberry-zip/src/extra_fields.rs b/crates/soapberry-zip/src/extra_fields.rs new file mode 100644 index 0000000..28dc885 --- /dev/null +++ b/crates/soapberry-zip/src/extra_fields.rs @@ -0,0 +1,454 @@ +use crate::{Error, ErrorKind, Header, utils::le_u16}; +use std::io::Write; + +/// A numeric identifier for an extra field in a Zip archive. +/// +/// Constants defined here correspond to the IDs defined in the Zip specification. +/// +/// See sections 4.5 and 4.6 of the Zip spec. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct ExtraFieldId(u16); + +impl ExtraFieldId { + pub const ZIP64: Self = Self(0x0001); + pub const AV_INFO: Self = Self(0x0007); + pub const EXTENDED_LANGUAGE_ENCODING: Self = Self(0x0008); + pub const OS2: Self = Self(0x0009); + pub const NTFS: Self = Self(0x000a); + pub const OPENVMS: Self = Self(0x000c); + pub const UNIX: Self = Self(0x000d); + pub const FILE_STREAM_AND_FORK_DESCRIPTORS: Self = Self(0x000e); + pub const PATCH_DESCRIPTOR: Self = Self(0x000f); + pub const PKCS7_STORE: Self = Self(0x0014); + pub const X509_CERT_ID_AND_SIG: Self = Self(0x0015); + pub const X509_CERT_ID_CENTRAL_DIR: Self = Self(0x0016); + pub const STRONG_ENCRYPTION_HEADER: Self = Self(0x0017); + pub const RECORD_MANAGEMENT_CONTROLS: Self = Self(0x0018); + pub const PKCS7_ENCRYPTION_RECIPIENT_CERT_LIST: Self = Self(0x0019); + pub const TIMESTAMP_RECORD: Self = Self(0x0020); + pub const POLICY_DECRYPTION_KEY_RECORD: Self = Self(0x0021); + pub const SMARTCRYPT_KEY_PROVIDER: Self = Self(0x0022); + pub const SMARTCRYPT_POLICY_KEY_DATA: Self = Self(0x0023); + pub const IBM_S390_AS400_UNCOMPRESSED: Self = Self(0x0065); + pub const IBM_S390_AS400_COMPRESSED: Self = Self(0x0066); + pub const POSZIP_4690: Self = Self(0x4690); + pub const EXTENDED_TIMESTAMP: Self = Self(0x5455); + pub const INFO_ZIP_UNIX_ORIGINAL: Self = Self(0x5855); + pub const INFO_ZIP_UNIX: Self = Self(0x7855); + pub const INFO_ZIP_UNIX_UID_GID: Self = Self(0x7875); + pub const JAVA_JAR: Self = Self(0xCAFE); + pub const ANDROID_ZIP_ALIGNMENT: Self = Self(0xD935); + pub const MACINTOSH: Self = Self(0x07c8); + pub const ACORN_SPARKFS: Self = Self(0x4341); + pub const WINDOWS_NT_SECURITY_DESCRIPTOR: Self = Self(0x4653); + pub const AOS_VS_ACL: Self = Self(0x5356); + pub const INFO_ZIP_UNICODE_COMMENT: Self = Self(0x6375); + pub const INFO_ZIP_UNICODE_PATH: Self = Self(0x7075); + pub const DATA_STREAM_ALIGNMENT: Self = Self(0xa11e); + pub const MICROSOFT_OPEN_PACKAGING_GROWTH_HINT: Self = Self(0xa220); + + /// Returns the raw `u16` value of the extra field ID. + #[inline] + pub const fn new(id: u16) -> Self { + Self(id) + } + + /// Returns the raw `u16` value of the extra field ID. + #[inline] + pub const fn as_u16(self) -> u16 { + self.0 + } +} + +/// An iterator over extra field entries in a Zip archive. +/// +/// This follows zip spec section 4.5 defines extensible data fields: +/// +/// - Header ID - 2 bytes +/// - Data Size - 2 bytes +/// - Data - variable length +/// +/// If the iterator encounters malformed or truncated data, it will stop +/// yielding entries. You can check [`ExtraFields::remaining_bytes()`] after +/// iteration to detect if any data was left unparsed. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct ExtraFields<'a> { + data: &'a [u8], +} + +impl<'a> ExtraFields<'a> { + /// Creates a new iterator over the extra fields in the provided data slice. + #[inline] + pub fn new(data: &'a [u8]) -> Self { + Self { data } + } + + /// Returns the remaining unparsed bytes in the extra field data. + #[inline] + pub fn remaining_bytes(&self) -> &'a [u8] { + self.data + } + + #[inline] + fn next_data(&mut self) -> Option<&'a [u8]> { + let scratch = self.data; + if scratch.len() < 4 { + return None; + } + + let size = le_u16(&scratch[2..4]) as usize; + let total_field_len = size + 4; + if scratch.len() < total_field_len { + return None; + } + + let (body, rest) = scratch.split_at(total_field_len); + + // Only advance once we have the entire entry + self.data = rest; + Some(body) + } +} + +impl<'a> Iterator for ExtraFields<'a> { + type Item = (ExtraFieldId, &'a [u8]); + + #[inline] + fn next(&mut self) -> Option { + let next_chunk = self.next_data()?; + let kind = le_u16(&next_chunk[0..2]); + let body = &next_chunk[4..]; + Some((ExtraFieldId(kind), body)) + } +} + +/// Container for extra fields with a shared data buffer and cached sizes. +#[derive(Debug, Clone)] +pub(crate) struct ExtraFieldsContainer { + entries: StackVec, + data_buffer: StackVec, + pub(crate) local_size: u16, + pub(crate) central_size: u16, +} + +impl ExtraFieldsContainer { + pub fn new() -> Self { + Self { + entries: StackVec::new(Header::new(0)), + data_buffer: StackVec::new(0u8), + local_size: 0, + central_size: 0, + } + } + + pub fn add_field( + &mut self, + id: ExtraFieldId, + data: &[u8], + location: Header, + ) -> Result<(), Error> { + let size_delta = 4 + data.len(); + let mut current_size = 0; + if location.includes_local() { + current_size = self.local_size; + } + if location.includes_central() { + current_size = std::cmp::max(self.central_size, current_size); + } + + if size_delta + (current_size as usize) > u16::MAX as usize { + return Err(Error::from(ErrorKind::InvalidInput { + msg: "extra field data too large".to_string(), + })); + } + + let mut buffer = [0u8; 4]; + buffer[0..2].copy_from_slice(&id.as_u16().to_le_bytes()); + buffer[2..4].copy_from_slice(&(data.len() as u16).to_le_bytes()); + self.data_buffer.extend_from_slice(&buffer); + self.data_buffer.extend_from_slice(data); + if location.includes_local() { + self.local_size += size_delta as u16; + } + if location.includes_central() { + self.central_size += size_delta as u16; + } + + self.entries.push(location); + Ok(()) + } + + fn write_extra_fields_iter( + &self, + writer: &mut impl Write, + filter: Header, + ) -> Result<(), Error> { + let fields = self.data_buffer.as_slice(); + let mut extra_fields = ExtraFields::new(fields); + let entries = self.entries.as_slice(); + for entry in entries { + let extra_field = extra_fields.next_data().expect("Entry should have data"); + let write = entry.intersects(filter); + if write { + writer.write_all(extra_field)?; + } + } + Ok(()) + } + + #[inline] + pub fn write_extra_fields(&self, writer: &mut impl Write, filter: Header) -> Result<(), Error> { + if filter == Header::LOCAL && self.local_size == 0 { + // No local fields to write + Ok(()) + } else if filter == Header::CENTRAL && self.central_size == 0 { + // No central fields to write + Ok(()) + } else if self.local_size == 0 || self.central_size == 0 { + // If everything is one sided, we can dump everything + writer.write_all(self.data_buffer.as_slice())?; + Ok(()) + } else { + self.write_extra_fields_iter(writer, filter) + } + } +} + +/// A stack-first vector that avoids heap allocation for small amounts of data. +/// +/// A poor man's `smallvec` as we aren't able to store as many elements inline +/// (by one byte), but it's still an extremely effective no dependency, no +/// unsafe solution, as benchmarks showed a 33% throughput improvement when +/// writing out files with timestamps. +#[derive(Debug, Clone)] +pub(crate) enum StackVec +where + T: Copy + Clone, +{ + /// Inline storage for up to N elements + Small { data: [T; N], len: u8 }, + /// Heap storage for more elements + Large(Vec), +} + +impl StackVec +where + T: Copy + Clone, +{ + pub fn new(default_val: T) -> Self { + Self::Small { + data: [default_val; N], + len: 0, + } + } + + pub fn push(&mut self, item: T) { + match self { + Self::Small { data, len } => { + if (*len as usize) < N { + // Still fits in small storage + data[*len as usize] = item; + *len += 1; + } else { + // Need to promote to large storage + let mut vec = Vec::with_capacity(N + 1); + vec.extend_from_slice(&data[..N]); + vec.push(item); + *self = Self::Large(vec); + } + }, + Self::Large(vec) => { + vec.push(item); + }, + } + } + + pub fn as_slice(&self) -> &[T] { + match self { + Self::Small { data, len } => &data[..*len as usize], + Self::Large(vec) => vec.as_slice(), + } + } +} + +// Specialized methods for StackVec (byte buffers) +impl StackVec { + pub fn extend_from_slice(&mut self, slice: &[u8]) { + match self { + Self::Small { data, len } => { + let current_len = *len as usize; + let end = current_len + slice.len(); + if end <= N { + data[current_len..current_len + slice.len()].copy_from_slice(slice); + *len += slice.len() as u8; + } else { + // Need to promote to large buffer + let mut vec = Vec::with_capacity(current_len + slice.len()); + vec.extend_from_slice(&data[..current_len]); + vec.extend_from_slice(slice); + *self = Self::Large(vec); + } + }, + Self::Large(vec) => { + vec.extend_from_slice(slice); + }, + } + } +} + +#[derive(Debug)] +pub enum StackVecIter<'a, T, const N: usize> +where + T: Copy + Clone, +{ + Small { + data: &'a [T; N], + len: u8, + index: u8, + }, + Large(std::slice::Iter<'a, T>), +} + +impl<'a, T, const N: usize> Iterator for StackVecIter<'a, T, N> +where + T: Copy + Clone, +{ + type Item = &'a T; + + fn next(&mut self) -> Option { + match self { + Self::Small { data, len, index } => { + if *index < *len { + let result = &data[*index as usize]; + *index += 1; + Some(result) + } else { + None + } + }, + Self::Large(iter) => iter.next(), + } + } +} + +#[cfg(test)] +mod tests { + use std::io::Cursor; + + use super::*; + + #[test] + fn test_partial_parsing_with_remaining_bytes() { + let data = [0x55, 0x54, 0x01, 0x00, 0xFF, 0x01, 0x00, 0x05]; + let mut iter = ExtraFields::new(&data); + assert_eq!(iter.remaining_bytes(), &data); + + let (id, body) = iter.next().unwrap(); + assert_eq!(id, ExtraFieldId::EXTENDED_TIMESTAMP); + assert_eq!(body, &[0xFF]); + + assert_eq!(iter.next(), None); + assert_eq!(iter.remaining_bytes(), &[0x01, 0x00, 0x05]); + } + + #[test] + fn test_unknown_field_id() { + let data = [0xFF, 0xFF, 0x02, 0x00, 0xDE, 0xAD]; + let mut iter = ExtraFields::new(&data); + + let (id, body) = iter.next().unwrap(); + assert_eq!(id, ExtraFieldId(0xFFFF)); + assert_eq!(body, &[0xDE, 0xAD]); + + assert_eq!(iter.next(), None); + } + + #[test] + fn test_stack_vec_u8_inline_operations() { + let mut buf = StackVec::::new(0); + assert_eq!(buf.as_slice(), &[]); + + buf.push(1); + assert_eq!(buf.as_slice(), &[1]); + + buf.extend_from_slice(&[2, 3]); + assert_eq!(buf.as_slice(), &[1, 2, 3]); + } + + #[test] + fn test_stack_vec_u8_promote_to_heap() { + let mut buf = StackVec::::new(0); + + // Fill inline capacity + buf.extend_from_slice(&[1, 2]); + assert_eq!(buf.as_slice(), &[1, 2]); + + // Force promotion to heap + buf.extend_from_slice(&[3, 4, 5]); + assert_eq!(buf.as_slice(), &[1, 2, 3, 4, 5]); + + buf.push(6); + assert_eq!(buf.as_slice(), &[1, 2, 3, 4, 5, 6]); + } + + #[test] + fn test_stack_vec_size_constraints() { + // Test that StackVec for bytes is same size as Vec + assert!( + std::mem::size_of::>() <= 24, + "StackVec should not exceed Vec size on 64 bits" + ); + } + + #[test] + fn test_stack_vec_clone() { + let mut buf = StackVec::::new(0); + buf.extend_from_slice(&[1, 2, 3]); // Force heap promotion + + let cloned = buf.clone(); + assert_eq!(buf.as_slice(), cloned.as_slice()); + } + + fn round_trip_extra_fields(fields: &[(ExtraFieldId, &[u8], Header)]) { + let mut container = ExtraFieldsContainer::new(); + + for (id, data, location) in fields { + container.add_field(*id, data, *location).unwrap(); + } + + for location in [Header::LOCAL, Header::CENTRAL] { + let mut cursor = Cursor::new(Vec::new()); + container.write_extra_fields(&mut cursor, location).unwrap(); + + let written_fields = fields + .iter() + .filter(|&&(_, _, loc)| loc == location) + .map(|&(id, data, _)| (id, data)) + .collect::>(); + let read_fields = ExtraFields::new(cursor.get_ref()).collect::>(); + + assert_eq!(written_fields, read_fields); + } + } + + #[test] + fn test_extra_fields() { + // Only local extra fields + round_trip_extra_fields(&[ + (ExtraFieldId::new(0), &[0u8; 16], Header::LOCAL), + (ExtraFieldId::new(1), &[1u8; 16], Header::LOCAL), + ]); + + // Only central extra fields + round_trip_extra_fields(&[ + (ExtraFieldId::new(0), &[0u8; 16], Header::CENTRAL), + (ExtraFieldId::new(1), &[1u8; 16], Header::CENTRAL), + ]); + + // Mixed extra fields where the local and central sizes are the same + round_trip_extra_fields(&[ + (ExtraFieldId::new(0), &[0u8; 16], Header::CENTRAL), + (ExtraFieldId::new(1), &[1u8; 16], Header::LOCAL), + ]); + } +} diff --git a/crates/soapberry-zip/src/headers.rs b/crates/soapberry-zip/src/headers.rs new file mode 100644 index 0000000..4bc62c6 --- /dev/null +++ b/crates/soapberry-zip/src/headers.rs @@ -0,0 +1,118 @@ +/// Specifies which ZIP headers to place data. +/// +/// The ZIP specification allows for different data in local file headers versus +/// central directory headers. This type provides control over where data is +/// placed. +/// +/// The default value is to place data in both header locations. +/// +/// For usage example, see +/// [`ZipFileBuilder::extra_field`](crate::ZipFileBuilder::extra_field) +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct Header(u8); + +impl Header { + /// Include data only in the local file header. + pub const LOCAL: Self = Self(0b01); + + /// Include data only in the central directory. + pub const CENTRAL: Self = Self(0b10); + + #[inline] + pub(crate) const fn new(value: u8) -> Self { + Self(value) + } + + #[inline] + pub(crate) const fn includes_local(self) -> bool { + self.0 & Self::LOCAL.0 != 0 + } + + #[inline] + pub(crate) const fn includes_central(self) -> bool { + self.0 & Self::CENTRAL.0 != 0 + } + + #[inline] + pub(crate) const fn intersects(self, other: Self) -> bool { + (self.0 & other.0) != 0 + } +} + +impl Default for Header { + fn default() -> Self { + Self(Self::LOCAL.0 | Self::CENTRAL.0) + } +} + +impl std::ops::BitOr for Header { + type Output = Self; + + #[inline] + fn bitor(self, rhs: Self) -> Self::Output { + Self(self.0 | rhs.0) + } +} + +impl std::ops::BitOrAssign for Header { + #[inline] + fn bitor_assign(&mut self, rhs: Self) { + self.0 |= rhs.0; + } +} + +impl std::ops::BitAnd for Header { + type Output = Self; + + #[inline] + fn bitand(self, rhs: Self) -> Self::Output { + Self(self.0 & rhs.0) + } +} + +impl std::ops::BitAndAssign for Header { + #[inline] + fn bitand_assign(&mut self, rhs: Self) { + self.0 &= rhs.0; + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_header_bitflags_behavior() { + // Test that default equals LOCAL | CENTRAL + assert_eq!(Header::LOCAL | Header::CENTRAL, Header::default()); + + // Test includes methods + assert!(Header::LOCAL.includes_local()); + assert!(!Header::LOCAL.includes_central()); + + assert!(!Header::CENTRAL.includes_local()); + assert!(Header::CENTRAL.includes_central()); + + assert!(Header::default().includes_local()); + assert!(Header::default().includes_central()); + + // Test bitwise operations + let mut header = Header::LOCAL; + header |= Header::CENTRAL; + assert_eq!(header, Header::default()); + + let intersection = Header::default() & Header::LOCAL; + assert_eq!(intersection, Header::LOCAL); + + // Test intersects method + assert!(Header::default().intersects(Header::LOCAL)); + assert!(Header::default().intersects(Header::CENTRAL)); + assert!(Header::LOCAL.intersects(Header::default())); + assert!(!Header::LOCAL.intersects(Header::CENTRAL)); + } + + #[test] + fn test_header_default() { + assert_eq!(Header::default(), Header::LOCAL | Header::CENTRAL); + } +} diff --git a/crates/soapberry-zip/src/lib.rs b/crates/soapberry-zip/src/lib.rs new file mode 100644 index 0000000..9265dc5 --- /dev/null +++ b/crates/soapberry-zip/src/lib.rs @@ -0,0 +1,48 @@ +//! High-performance ZIP archive library optimized for Office document formats. +//! +//! This crate provides efficient ZIP reading and writing specifically designed +//! for OOXML (.docx, .xlsx, .pptx), ODF (.odt, .ods, .odp), and iWork +//! (.pages, .numbers, .key) file formats. +//! +//! # Quick Start +//! +//! For most use cases, use the high-level [`office`] module: +//! +//! ```rust,no_run +//! use soapberry_zip::office::{ArchiveReader, StreamingArchiveWriter}; +//! +//! // Reading +//! let data = std::fs::read("document.docx")?; +//! let archive = ArchiveReader::new(&data)?; +//! let content = archive.read("word/document.xml")?; +//! +//! // Writing +//! let mut writer = StreamingArchiveWriter::new(); +//! writer.write_deflated("content.xml", b"")?; +//! let bytes = writer.finish_to_bytes()?; +//! # Ok::<(), Box>(()) +//! ``` +#![deny(unsafe_code)] + +mod archive; +mod crc; +mod errors; +pub mod extra_fields; +mod headers; +mod locator; +mod mode; +pub mod office; +pub mod path; +mod reader_at; +pub mod time; +mod utils; +mod writer; + +pub use archive::*; +pub use crc::crc32; +pub use errors::{Error, ErrorKind}; +pub use headers::Header; +pub use locator::*; +pub use mode::EntryMode; +pub use reader_at::{FileReader, RangeReader, ReaderAt}; +pub use writer::*; diff --git a/crates/soapberry-zip/src/locator.rs b/crates/soapberry-zip/src/locator.rs new file mode 100644 index 0000000..23d176d --- /dev/null +++ b/crates/soapberry-zip/src/locator.rs @@ -0,0 +1,960 @@ +use crate::errors::{Error, ErrorKind}; +use crate::reader_at::{FileReader, ReaderAtExt}; +use crate::utils::{le_u16, le_u32, le_u64}; +use crate::{ + END_OF_CENTRAL_DIR_LOCATOR_SIGNATURE, ReaderAt, Zip64EndOfCentralDirectory, + Zip64EndOfCentralDirectoryRecord, ZipArchive, ZipFileHeaderFixed, ZipSliceArchive, +}; +use std::cell::RefCell; +use std::fs::File; +use std::io::Seek; +use std::num::NonZeroU64; + +const END_OF_CENTRAL_DIR_SIGNAUTRE: u32 = 0x06054b50; +pub(crate) const END_OF_CENTRAL_DIR_SIGNAUTRE_BYTES: [u8; 4] = + END_OF_CENTRAL_DIR_SIGNAUTRE.to_le_bytes(); + +// https://github.com/zlib-ng/minizip-ng/blob/55db144e03027b43263e5ebcb599bf0878ba58de/mz_zip.c#L78 +const END_OF_CENTRAL_DIR_MAX_OFFSET: u64 = 1 << 20; + +/// Locates the End of Central Directory (EOCD) record in a ZIP archive. +/// +/// The `ZipLocator` is responsible for finding the EOCD record, which is +/// crucial for reading the contents of a ZIP file. +/// +/// In the event, that the comment or tailing data contains the EOCD signature, +/// causing the zip locator to fail to parse. One can reparse the data starting +/// from the false EOCD offset using the reported offset +/// [`Error::eocd_offset()`] +#[derive(Debug)] +pub struct ZipLocator { + max_search_space: u64, +} + +impl Default for ZipLocator { + fn default() -> Self { + Self::new() + } +} + +impl ZipLocator { + /// Creates a new `ZipLocator` with a default maximum search space of 1 MiB + pub fn new() -> Self { + ZipLocator { + max_search_space: END_OF_CENTRAL_DIR_MAX_OFFSET, + } + } + + /// Sets the maximum number of bytes to search for the EOCD signature. + /// + /// The search is performed backwards from the end of the data source. + /// + /// ```rust + /// use soapberry_zip::ZipLocator; + /// + /// let locator = ZipLocator::new().max_search_space(1024 * 64); // 64 KiB + /// ``` + pub fn max_search_space(mut self, max_search_space: u64) -> Self { + self.max_search_space = max_search_space; + self + } + + fn locate_in_byte_slice(&self, data: &[u8]) -> Result { + let location = find_end_of_central_dir_signature(data, self.max_search_space as usize) + .ok_or(ErrorKind::MissingEndOfCentralDirectory)?; + + let mut eocd = self + .locate_in_byte_slice_impl(data, location) + .map_err(|e| e.with_eocd_offset(location as u64))?; + + // Transparently verify that the self reported central directory points + // to a valid entry. If it is not a valid entry, we can attempt to + // correct offsets when there is undeclared prelude data by testing if + // the central directory directly precedes the end of central directory + // marker, which should hold true in the vast majority of cases. If both + // checks fail, defer returning an error until the user explicitly wants + // to iterate through the central directory. + let first_entry = data + .get(eocd.central_dir_offset as usize..) + .filter(|d| ZipFileHeaderFixed::parse(d).is_ok()); + + match first_entry { + None if !eocd.is_zip64() => { + let cd_offset = eocd.eocd_offset.saturating_sub(eocd.central_dir_size); + + let first_entry = data + .get(cd_offset as usize..) + .filter(|d| ZipFileHeaderFixed::parse(d).is_ok()); + + if first_entry.is_some() { + eocd.base_offset = cd_offset.saturating_sub(eocd.central_dir_offset); + eocd.central_dir_offset = cd_offset; + } + + Ok(eocd) + }, + _ => Ok(eocd), + } + } + + fn locate_in_byte_slice_impl( + &self, + data: &[u8], + location: usize, + ) -> Result { + let eocd = EndOfCentralDirectoryRecordFixed::parse(&data[location..])?; + let is_zip64 = eocd.is_zip64(); + let eocd = EndOfCentralDirectoryRecord::from_parts(location as u64, eocd); + + // Validate comment is completely present in the slice + let comment_start = location + EndOfCentralDirectoryRecordFixed::SIZE; + let comment_len = eocd.comment_len as usize; + if comment_start + comment_len > data.len() { + return Err(Error::from(ErrorKind::Eof)); + } + + if !is_zip64 { + return EndOfCentralDirectory::create(eocd); + } + + let zip64l = + &data[location.saturating_sub(Zip64EndOfCentralDirectoryLocatorRecord::SIZE)..]; + let zip64_locator = Zip64EndOfCentralDirectoryLocatorRecord::parse(zip64l)?; + let zip64_eocd = &data[(zip64_locator.directory_offset as usize).min(data.len())..]; + let zip64_record = Zip64EndOfCentralDirectoryRecord::parse(zip64_eocd)?; + + let zip64 = + Zip64EndOfCentralDirectory::from_parts(zip64_locator.directory_offset, zip64_record); + EndOfCentralDirectory::create_zip64(eocd, zip64) + } + + /// Locates the EOCD record within a byte slice. + /// + /// On success, returns a `ZipSliceArchive` which allows reading the archive + /// directly from the slice. On failure, returns the original slice and an `Error`. + /// + /// # Examples + /// + /// ```rust + /// use soapberry_zip::ZipLocator; + /// use std::fs; + /// use std::io::Read; + /// + /// # fn main() -> Result<(), Box> { + /// let mut file = fs::File::open("assets/readme.zip")?; + /// let mut data = Vec::new(); + /// file.read_to_end(&mut data)?; + /// + /// let locator = ZipLocator::new(); + /// match locator.locate_in_slice(&data) { + /// Ok(archive) => { + /// println!("Found EOCD in slice, archive has {} files.", archive.entries_hint()); + /// } + /// Err((_data, e)) => { + /// eprintln!("Failed to locate EOCD in slice: {:?}", e); + /// } + /// } + /// # Ok(()) + /// # } + /// ``` + pub fn locate_in_slice>( + &self, + data: T, + ) -> Result, (T, Error)> { + match self.locate_in_byte_slice(data.as_ref()) { + Ok(eocd) => Ok(ZipSliceArchive::new(data, eocd)), + Err(e) => Err((data, e)), + } + } + + /// Locates the EOCD record within a file. + /// + /// A mutable byte slice to use for reading data from the file. The buffer + /// should be large enough to hold the EOCD record and potentially parts of + /// the ZIP64 EOCD locator if present. A common size might be a few + /// kilobytes. + /// + /// On failure, returns the original file and an `Error`. + /// + /// # Examples + /// + /// ```rust + /// use soapberry_zip::ZipLocator; + /// use std::fs::File; + /// + /// # fn main() -> Result<(), Box> { + /// let file = File::open("assets/readme.zip")?; + /// let mut buffer = vec![0; soapberry_zip::RECOMMENDED_BUFFER_SIZE]; + /// let locator = ZipLocator::new(); + /// + /// match locator.locate_in_file(file, &mut buffer) { + /// Ok(archive) => { + /// println!("Found EOCD in file, archive has {} files.", archive.entries_hint()); + /// } + /// Err((_file, e)) => { + /// eprintln!("Failed to locate EOCD in file: {:?}", e); + /// } + /// } + /// # Ok(()) + /// # } + /// ``` + pub fn locate_in_file( + &self, + file: std::fs::File, + buffer: &mut [u8], + ) -> Result, (File, Error)> { + let mut reader = FileReader::from(file); + let end_offset = match reader.seek(std::io::SeekFrom::End(0)) { + Ok(offset) => offset, + Err(e) => return Err((reader.into_inner(), Error::from(e))), + }; + self.locate_in_reader(reader, buffer, end_offset) + .map_err(|(fr, e)| (fr.into_inner(), e)) + } + + /// Locates the EOCD record in a reader, treating the specified end offset + /// as the starting point when searching backwards. + /// + /// This method is useful for several scenarios: + /// + /// - Zip archive is nowhere near the end of the reader + /// - Zip archives are concatenated + /// + /// For seekable readers, you can determine the end_offset by seeking to the + /// end of the stream. + /// + /// Note that the zip locator may request data passed the end offset in + /// order to read the entire end of the central directory record + comment. + /// + /// # Examples + /// + /// ```rust + /// use soapberry_zip::{ZipLocator, FileReader}; + /// use std::fs::File; + /// use std::io::Seek; + /// + /// # fn main() -> Result<(), soapberry_zip::Error> { + /// let file = File::open("assets/test.zip").unwrap(); + /// let mut reader = FileReader::from(file); + /// let mut buffer = vec![0; soapberry_zip::RECOMMENDED_BUFFER_SIZE]; + /// let locator = ZipLocator::new(); + /// + /// // An example of determining the end offset when you don't + /// // the length but have a seekable reader. + /// let end_offset = reader.seek(std::io::SeekFrom::End(0)).unwrap(); + /// let archive = locator.locate_in_reader(reader, &mut buffer, end_offset) + /// .map_err(|(_, e)| e)?; + /// + /// // Maybe there is another zip archive to be found. + /// // To find where the current archive starts, we need the minimum local header + /// // offset. Below we are being conservative and iterating through the entire central + /// // directory for the start offset, but in reality out of order central directories + /// // are an edge case. + /// let zip_start = { + /// let mut min_offset = u64::MAX; + /// let mut entries = archive.entries(&mut buffer); + /// while let Ok(Some(entry)) = entries.next_entry() { + /// min_offset = min_offset.min(entry.local_header_offset()); + /// } + /// if min_offset == u64::MAX { 0 } else { min_offset } + /// }; + /// match locator.locate_in_reader(archive.get_ref(), &mut buffer, zip_start) { + /// Ok(previous_archive) => { + /// println!("Found previous ZIP archive!"); + /// } + /// Err((_, _)) => println!("No previous ZIP archive found"), + /// } + /// # Ok(()) + /// # } + /// ``` + pub fn locate_in_reader( + &self, + mut reader: R, + buffer: &mut [u8], + end_offset: u64, + ) -> Result, (R, Error)> + where + R: ReaderAt, + { + let location_result = + find_end_of_central_dir(&mut reader, buffer, self.max_search_space, end_offset); + + let (eocd_offset, buffer_pos, buffer_valid_len) = match location_result { + Ok(Some(location_tuple)) => location_tuple, + Ok(None) => { + return Err((reader, Error::from(ErrorKind::MissingEndOfCentralDirectory))); + }, + Err(error) => { + return Err((reader, Error::io(error))); + }, + }; + + let (reader, mut eocd) = self + .locate_in_reader_impl(reader, buffer, eocd_offset, buffer_pos, buffer_valid_len) + .map_err(|(reader, e)| (reader, e.with_eocd_offset(eocd_offset)))?; + + // Check first entry in central directory, see + // `ZipLocator::locate_in_byte_slice` for more info + let first_entry = reader + .read_exact_at( + &mut buffer[..ZipFileHeaderFixed::SIZE], + eocd.central_dir_offset, + ) + .ok() + .filter(|_| ZipFileHeaderFixed::parse(buffer).is_ok()); + + match first_entry { + None if !eocd.is_zip64() => { + let cd_offset = eocd.eocd_offset.saturating_sub(eocd.central_dir_size); + + let first_entry = reader + .read_exact_at(&mut buffer[..ZipFileHeaderFixed::SIZE], cd_offset) + .ok() + .filter(|_| ZipFileHeaderFixed::parse(buffer).is_ok()); + + if first_entry.is_some() { + eocd.base_offset = cd_offset.saturating_sub(eocd.central_dir_offset); + eocd.central_dir_offset = cd_offset; + } + + Ok(ZipArchive::new(reader, eocd)) + }, + _ => Ok(ZipArchive::new(reader, eocd)), + } + } + + fn locate_in_reader_impl( + &self, + reader: R, + buffer: &mut [u8], + eocd_offset: u64, + buffer_pos: usize, + buffer_valid_len: usize, + ) -> Result<(R, EndOfCentralDirectory), (R, Error)> + where + R: ReaderAt, + { + // Most likely the single read to find the end of the central directory + // will fill the buffer with entire end of the central directory (and + // optionally zip64 end of central directory). So let's try and reuse + // the the data already in memory as much as possible. + let reader = Marker::new(reader); + + let mut end_of_central_directory = &buffer[buffer_pos..buffer_valid_len]; + let eocd = loop { + match EndOfCentralDirectoryRecordFixed::parse(end_of_central_directory) { + Ok(record) => break record, + Err(e) if e.is_eof() => { + // Unhappy path: the end of central directory crossed over read boundaries + let read = reader.read_at_least_at( + buffer, + EndOfCentralDirectoryRecordFixed::SIZE, + eocd_offset, + ); + + let read = match read { + Ok(read) => read, + Err(e) => return Err((reader.inner, e)), + }; + + end_of_central_directory = &buffer[..read]; + }, + Err(e) => return Err((reader.inner, e)), + } + }; + + let is_zip64 = eocd.is_zip64(); + + end_of_central_directory = + &end_of_central_directory[EndOfCentralDirectoryRecordFixed::SIZE..]; + + let comment_len = eocd.comment_len as usize; + + // Check if the rest of the buffer doesn't completely contain the comment. + if end_of_central_directory.len() < comment_len { + let pos = end_of_central_directory.len(); + let comment_offset = + eocd_offset + EndOfCentralDirectoryRecordFixed::SIZE as u64 + pos as u64; + let remaining_comment_len = comment_len - pos; + + // Try to read a single byte to validate the rest of the comment is accessible + let mut temp_buf = [0u8; 1]; + let end_comment_offset = comment_offset + remaining_comment_len as u64 - 1; + if let Err(e) = reader.read_exact_at(&mut temp_buf, end_comment_offset) { + return Err((reader.inner, Error::io(e))); + } + } + + let eocd = EndOfCentralDirectoryRecord::from_parts(eocd_offset, eocd); + if !is_zip64 { + return match EndOfCentralDirectory::create(eocd) { + Ok(eocd) => Ok((reader.inner, eocd)), + Err(e) => Err((reader.inner, e)), + }; + } + + let eocd64l_size = Zip64EndOfCentralDirectoryLocatorRecord::SIZE; + + // Unhappy path: if we needed to issue any reads since the original + // eocd or don't have enough data in the buffer + let eocd64l_pos = if reader.is_marked() || eocd64l_size > buffer_pos { + if (eocd64l_size as u64) > eocd_offset { + return Err(( + reader.inner, + Error::from(ErrorKind::MissingZip64EndOfCentralDirectory), + )); + } + + let read = reader.read_exact_at( + &mut buffer[..eocd64l_size], + eocd_offset - eocd64l_size as u64, + ); + + match read { + Ok(_) => 0, + Err(e) => return Err((reader.inner, Error::io(e))), + } + } else { + buffer_pos - eocd64l_size + }; + + let zip64l_eocd = &buffer[eocd64l_pos..eocd64l_pos + eocd64l_size]; + let zip64_locator = match Zip64EndOfCentralDirectoryLocatorRecord::parse(zip64l_eocd) { + Ok(locator) => locator, + Err(e) => return Err((reader.inner, e)), + }; + + let zip64_eocd_fixed_size = Zip64EndOfCentralDirectoryRecord::SIZE; + + // Unhappy path: zip64 eocd is not in the original buffer + let (eocd64_start, eocd64_end) = if reader.is_marked() + || zip64_locator.directory_offset > eocd_offset + || eocd_offset - zip64_locator.directory_offset > buffer_pos as u64 + { + let read = reader.try_read_at_least_at( + buffer, + zip64_eocd_fixed_size, + zip64_locator.directory_offset, + ); + + match read { + Ok(read) => (0, read), + Err(e) => { + return Err((reader.inner, Error::io(e))); + }, + } + } else { + ( + buffer_pos - (eocd_offset - zip64_locator.directory_offset) as usize, + buffer_valid_len, + ) + }; + + let zip64_eocd = &buffer[eocd64_start..eocd64_end]; + let zip64_record = match Zip64EndOfCentralDirectoryRecord::parse(zip64_eocd) { + Ok(record) => record, + Err(e) => return Err((reader.inner, e)), + }; + + // todo: zip64 extensible data sector + + let zip_eocd = + Zip64EndOfCentralDirectory::from_parts(zip64_locator.directory_offset, zip64_record); + match EndOfCentralDirectory::create_zip64(eocd, zip_eocd) { + Ok(eocd) => Ok((reader.inner, eocd)), + Err(e) => Err((reader.inner, e)), + } + } +} + +#[derive(Debug, Clone)] +pub(crate) struct EndOfCentralDirectory { + eocd_offset: u64, + zip64_eocd_offset: Option, + central_dir_size: u64, + central_dir_offset: u64, + num_entries: u64, + comment_len: u16, + base_offset: u64, +} + +impl EndOfCentralDirectory { + pub(crate) fn create(eocd: EndOfCentralDirectoryRecord) -> Result { + let result = EndOfCentralDirectory { + eocd_offset: eocd.offset, + zip64_eocd_offset: None, + central_dir_size: u64::from(eocd.central_dir_size), + central_dir_offset: u64::from(eocd.central_dir_offset), + num_entries: u64::from(eocd.num_entries), + comment_len: eocd.comment_len, + base_offset: 0, + }; + + result.validate()?; + Ok(result) + } + + pub(crate) fn create_zip64( + eocd: EndOfCentralDirectoryRecord, + zip64: Zip64EndOfCentralDirectory, + ) -> Result { + let result = EndOfCentralDirectory { + eocd_offset: eocd.offset, + zip64_eocd_offset: NonZeroU64::new(zip64.offset), + central_dir_size: zip64.central_dir_size, + central_dir_offset: zip64.central_dir_offset, + num_entries: zip64.num_entries, + comment_len: eocd.comment_len, + base_offset: 0, + }; + + result.validate()?; + Ok(result) + } + + fn validate(&self) -> Result<(), Error> { + // It doesn't make sense if the start of the central directory is after + // the end. + if self.directory_offset() > self.head_eocd_offset() { + return Err(Error::from(ErrorKind::InvalidEndOfCentralDirectory)); + } + + Ok(()) + } + + #[inline] + pub(crate) fn is_zip64(&self) -> bool { + self.zip64_eocd_offset.is_some() + } + + pub(crate) fn base_offset(&self) -> u64 { + self.base_offset + } + + /// The first end of the central directory signature offsets. + /// + /// This is offset where no new central directory records are expected. + /// + /// Will be equivalent to [`Self::tail_eocd_offset`] eocd for non-zip64 files + #[inline] + pub(crate) fn head_eocd_offset(&self) -> u64 { + self.zip64_eocd_offset + .map(|x| x.get()) + .unwrap_or(self.eocd_offset) + } + + /// The last end of the central directory signature offsets. + /// + /// This will always be the byte offset of 0x06054b50 + #[inline] + pub(crate) fn tail_eocd_offset(&self) -> u64 { + self.eocd_offset + } + + /// offset of the start of the central directory + #[inline] + pub(crate) fn directory_offset(&self) -> u64 { + self.central_dir_offset + } + + #[inline] + pub(crate) fn entries(&self) -> u64 { + self.num_entries + } + + #[inline] + pub(crate) fn comment_len(&self) -> usize { + self.comment_len as usize + } +} + +struct Marker { + inner: T, + marked: RefCell, +} + +impl Marker { + fn new(inner: T) -> Self { + Self { + inner, + marked: RefCell::new(false), + } + } + + fn is_marked(&self) -> bool { + *self.marked.borrow() + } +} + +impl ReaderAt for Marker +where + T: ReaderAt, +{ + fn read_at(&self, buf: &mut [u8], offset: u64) -> std::io::Result { + match self.inner.read_at(buf, offset) { + Ok(n) if n > 0 => { + *self.marked.borrow_mut() = true; + Ok(n) + }, + x => x, + } + } +} + +impl std::io::Seek for Marker +where + T: std::io::Seek, +{ + fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result { + self.inner.seek(pos) + } +} + +/// A non-zip64 end of central directory +#[derive(Debug, Clone)] +pub(crate) struct EndOfCentralDirectoryRecord { + pub(crate) offset: u64, + pub(crate) central_dir_size: u32, + pub(crate) central_dir_offset: u32, + pub(crate) num_entries: u16, + pub(crate) comment_len: u16, +} + +impl EndOfCentralDirectoryRecord { + #[inline] + pub fn from_parts(offset: u64, eocd: EndOfCentralDirectoryRecordFixed) -> Self { + Self { + offset, + central_dir_size: eocd.central_dir_size, + central_dir_offset: eocd.central_dir_offset, + num_entries: eocd.total_entries, + comment_len: eocd.comment_len, + } + } +} + +#[derive(Debug, Clone)] +pub(crate) struct EndOfCentralDirectoryRecordFixed { + pub(crate) signature: u32, + #[allow(dead_code)] + pub(crate) disk_number: u16, + #[allow(dead_code)] + pub(crate) eocd_disk: u16, + pub(crate) num_entries: u16, + pub(crate) total_entries: u16, + pub(crate) central_dir_size: u32, + pub(crate) central_dir_offset: u32, + pub(crate) comment_len: u16, +} + +impl EndOfCentralDirectoryRecordFixed { + pub(crate) const SIZE: usize = 22; + pub fn parse(data: &[u8]) -> Result { + if data.len() < Self::SIZE { + return Err(Error::from(ErrorKind::Eof)); + } + + let result = EndOfCentralDirectoryRecordFixed { + signature: le_u32(&data[0..4]), + disk_number: le_u16(&data[4..6]), + eocd_disk: le_u16(&data[6..8]), + num_entries: le_u16(&data[8..10]), + total_entries: le_u16(&data[10..12]), + central_dir_size: le_u32(&data[12..16]), + central_dir_offset: le_u32(&data[16..20]), + comment_len: le_u16(&data[20..22]), + }; + + if result.signature != END_OF_CENTRAL_DIR_SIGNAUTRE { + return Err(Error::from(ErrorKind::InvalidSignature { + expected: END_OF_CENTRAL_DIR_SIGNAUTRE, + actual: result.signature, + })); + } + + Ok(result) + } + + pub fn is_zip64(&self) -> bool { + // https://github.com/zlib-ng/minizip-ng/blob/55db144e03027b43263e5ebcb599bf0878ba58de/mz_zip.c#L1011 + self.num_entries == u16::MAX || // 4.4.22 + self.central_dir_offset == u32::MAX // 4.4.24 + } +} + +/// +/// +/// 4.3.15 +#[derive(Debug)] +#[allow(dead_code)] +struct Zip64EndOfCentralDirectoryLocatorRecord { + /// zip64 end of central dir locator signature + pub signature: u32, + + /// number of the disk with the start of the zip64 end of central directory + pub eocd_disk: u32, + + /// relative offset of the zip64 end of central directory record + pub directory_offset: u64, + + /// total number of disks + pub total_disks: u32, +} + +impl Zip64EndOfCentralDirectoryLocatorRecord { + const SIZE: usize = 20; + + pub fn parse(data: &[u8]) -> Result { + if data.len() < Self::SIZE { + return Err(Error::from(ErrorKind::Eof)); + } + + let result = Zip64EndOfCentralDirectoryLocatorRecord { + signature: le_u32(&data[0..4]), + eocd_disk: le_u32(&data[4..8]), + directory_offset: le_u64(&data[8..16]), + total_disks: le_u32(&data[16..20]), + }; + + if result.signature != END_OF_CENTRAL_DIR_LOCATOR_SIGNATURE { + return Err(Error::from(ErrorKind::InvalidSignature { + expected: END_OF_CENTRAL_DIR_LOCATOR_SIGNATURE, + actual: result.signature, + })); + } + + Ok(result) + } +} + +pub(crate) fn find_end_of_central_dir_signature( + data: &[u8], + max_search_space: usize, +) -> Option { + let start_search = data.len().saturating_sub(max_search_space); + backwards_find( + &data[start_search..], + &END_OF_CENTRAL_DIR_SIGNAUTRE.to_le_bytes(), + ) + .map(|pos| pos + start_search) +} + +pub(crate) fn find_end_of_central_dir( + reader: T, + buffer: &mut [u8], + max_search_space: u64, + end_offset: u64, +) -> std::io::Result> +where + T: ReaderAt, +{ + if buffer.len() < END_OF_CENTRAL_DIR_SIGNAUTRE_BYTES.len() { + debug_assert!(false, "buffer not big enough to hold signature"); + return Ok(None); + } + + let max_back = end_offset.saturating_sub(max_search_space); + let mut offset = end_offset; + + // The amount of data the remains in the stream + let mut remaining = end_offset - max_back; + + // The number of bytes that were translated from the front to the back + let mut carry_over = 0; + loop { + // We either want to read into the entire buffer (sans the bytes that + // were carried over from the last read). Or we want to read the remainder + let read_size = (buffer.len() - carry_over).min(remaining as usize); + + // Need to jump back to the start of the previous read and then how much + // we want to read + offset -= read_size as u64; + + // reader.seek_relative(-offset)?; + reader.read_exact_at(&mut buffer[..read_size], offset)?; + remaining -= read_size as u64; + + let haystack = &buffer[..read_size + carry_over]; + if let Some(i) = backwards_find(haystack, &END_OF_CENTRAL_DIR_SIGNAUTRE_BYTES) { + let eocd_offset = (max_back + remaining) + (i as u64); + return Ok(Some((eocd_offset, i, read_size + carry_over))); + } + + if remaining == 0 { + return Ok(None); + } + + // Since the signature may be across read boundaries, match how much the + // end of the signature matches the start of the buffer + carry_over = match buffer { + [b0, b1, b2, ..] if [*b0, *b1, *b2] == END_OF_CENTRAL_DIR_SIGNAUTRE_BYTES[1..4] => 3, + [b0, b1, ..] if [*b0, *b1] == END_OF_CENTRAL_DIR_SIGNAUTRE_BYTES[2..4] => 2, + [b0, ..] if *b0 == END_OF_CENTRAL_DIR_SIGNAUTRE_BYTES[3] => 1, + _ => 0, + }; + + if carry_over > 0 { + // place the carry over bytes at the end of the buffer for the next read + let dest = (buffer.len() - carry_over).min(remaining as usize); + buffer.copy_within(..carry_over, dest); + } + } +} + +fn backwards_find(haystack: &[u8], needle: &[u8]) -> Option { + haystack + .windows(needle.len()) + .rposition(|window| window == needle) +} + +#[cfg(test)] +mod tests { + use super::*; + use quickcheck_macros::quickcheck; + use rstest::rstest; + use std::io::Cursor; + + #[quickcheck] + fn test_find_end_of_central_dir_signature(mut data: Vec, offset: usize, chunk_size: u16) { + if data.len() < 4 { + return; + } + + let max_search_space = END_OF_CENTRAL_DIR_MAX_OFFSET; + let pos = (offset % data.len()).saturating_sub(END_OF_CENTRAL_DIR_SIGNAUTRE_BYTES.len()); + data[pos..pos + 4].copy_from_slice(&END_OF_CENTRAL_DIR_SIGNAUTRE_BYTES); + + let result = find_end_of_central_dir_signature(&data, max_search_space as usize).unwrap(); + + let mut buffer = vec![0u8; chunk_size.max(4) as usize]; + let reader = std::io::Cursor::new(&data); + let (index, buffer_index, buffer_valid_len) = + find_end_of_central_dir(reader, &mut buffer, max_search_space, data.len() as u64) + .unwrap() + .unwrap(); + + assert_eq!(index, result as u64); + assert!(buffer_valid_len > 0, "buffer_valid_len should be positive"); + assert!( + buffer_valid_len <= buffer.len(), + "buffer_valid_len should not exceed buffer capacity" + ); + assert!( + buffer_index < buffer_valid_len, + "buffer_index should be within buffer_valid_len" + ); + assert!( + buffer_index + END_OF_CENTRAL_DIR_SIGNAUTRE_BYTES.len() <= buffer_valid_len, + "signature should be within valid part of buffer" + ); + assert_eq!( + buffer[buffer_index..buffer_index + 4], + END_OF_CENTRAL_DIR_SIGNAUTRE_BYTES + ); + } + + #[quickcheck] + fn test_find_end_of_central_dir_signature_random( + data: Vec, + chunk_size: u16, + max_search_space: u64, + ) { + let mem = find_end_of_central_dir_signature(&data, max_search_space as usize); + + let mut buffer = vec![0u8; chunk_size.max(4) as usize]; + let reader = std::io::Cursor::new(&data); + let curse = + find_end_of_central_dir(reader, &mut buffer, max_search_space, data.len() as u64) + .unwrap(); + + let mem_result = mem.map(|x| x as u64); + let curse_result = curse.map(|(a, _, _)| a); + assert_eq!(mem_result, curse_result); + + if let Some((_, buffer_index, buffer_valid_len)) = curse { + assert!(buffer_valid_len > 0, "buffer_valid_len should be positive"); + assert!( + buffer_valid_len <= buffer.len(), + "buffer_valid_len should not exceed buffer capacity" + ); + assert!( + buffer_index < buffer_valid_len, + "buffer_index should be within buffer_valid_len" + ); + assert!( + buffer_index + END_OF_CENTRAL_DIR_SIGNAUTRE_BYTES.len() <= buffer_valid_len, + "signature should be within valid part of buffer" + ); + } + } + + #[rstest] + #[case(&[], 4, 1000, None)] + #[case(&[6], 4, 1000, None)] + #[case(&[5, 6], 4, 1000, None)] + #[case(&[b'K', 5, 6], 4, 1000, None)] + #[case(&[0, 6, 0, 0, 0], 4, 1000, None)] + #[case(&[b'P', b'K', 5, 6], 4, 1000, Some(0))] + #[case(&[b'P', b'K', 5, 6], 5, 1000, Some(0))] + #[case(&[b'P', b'K', 5, 6, 5, 6], 5, 1000, Some(0))] + #[case(&[b'P', b'K', 5, 6, 6, 0, 0, 0], 4, 1000, Some(0))] + #[case(&[b'P', b'K', 5, 6, 0, 0, 0, 0], 4, 1000, Some(0))] + #[case(&[b'P', b'K', 5, 6, 0, 0, 0], 4, 1000, Some(0))] + #[case(&[b'P', b'K', 5, 6, 0], 4, 1000, Some(0))] + #[case(&[5, 6, b'P', b'K', 5, 6], 4, 1000, Some(2))] + #[case(&[5, 6, b'P', b'K', 5, 6], 5, 1000, Some(2))] + #[case(&[5, 6, b'P', b'K', 5, 6, 5, 6], 4, 1000, Some(2))] + #[case(&[5, 6, b'P', b'K', 5, 6, 5, 6], 5, 1000, Some(2))] + #[case(&[b'P', b'K', 5, 6, b'P', b'K', 5, 6, 5, 6], 5, 1000, Some(4))] + #[case(&[b'P', b'K', 5, 6, b'P', b'K', 5, 6, 5, 6], 32, 1000, Some(4))] + #[case(&[b'P', b'K', 5, 6], 5, 4, Some(0))] // start of max search space tests + #[case(&[b'P', b'K', 5, 6, 5, 6], 5, 5, None)] + #[case(&[b'P', b'K', 5, 6, 6, 0, 0, 0], 4, 8, Some(0))] + #[case(&[b'P', b'K', 5, 6, 0, 0, 0], 4, 8, Some(0))] + #[case(&[b'P', b'K', 5, 6, 0], 4, 4, None)] + #[case(&[5, 6, b'P', b'K', 5, 6], 4, 4, Some(2))] + #[case(&[5, 6, b'P', b'K', 5, 6], 5, 4, Some(2))] + #[case(&[5, 6, b'P', b'K', 5, 6, 5, 6], 4, 4, None)] + #[case(&[5, 6, b'P', b'K', 5, 6, 5, 6], 5, 4, None)] + #[case(&[b'P', b'K', 5, 6, b'P', b'K', 5, 6, 5, 6], 5, 6, Some(4))] + #[case(&[b'P', b'K', 5, 6, b'P', b'K', 5, 6, 5, 6], 32, 10, Some(4))] + #[test] + fn test_find_end_of_central_dir_signature_cases( + #[case] input: &[u8], + #[case] buffer_size: usize, + #[case] max_search_space: u64, + #[case] expected: Option, + ) { + let result = find_end_of_central_dir_signature(input, max_search_space as usize); + assert_eq!(result.map(|x| x as u64), expected); + + let cursor = Cursor::new(&input); + let mut buffer = vec![0u8; buffer_size]; + let found = + find_end_of_central_dir(cursor, &mut buffer, max_search_space, input.len() as u64) + .unwrap(); + let found_result = found.map(|(a, _, _)| a); + assert_eq!(found_result, expected); + + if expected.is_some() { + let (_, buffer_pos, buffer_valid_len) = found.unwrap(); + assert!(buffer_valid_len > 0, "buffer_valid_len should be positive"); + assert!( + buffer_valid_len <= buffer_size, + "buffer_valid_len should not exceed buffer capacity" + ); + assert!( + buffer_pos < buffer_valid_len, + "buffer_index should be within buffer_valid_len" + ); + assert!( + buffer_pos + END_OF_CENTRAL_DIR_SIGNAUTRE_BYTES.len() <= buffer_valid_len, + "signature should be within valid part of buffer" + ); + assert_eq!( + buffer[buffer_pos..buffer_pos + 4], + END_OF_CENTRAL_DIR_SIGNAUTRE_BYTES + ); + } + } +} diff --git a/crates/soapberry-zip/src/mode.rs b/crates/soapberry-zip/src/mode.rs new file mode 100644 index 0000000..f11b62b --- /dev/null +++ b/crates/soapberry-zip/src/mode.rs @@ -0,0 +1,95 @@ +/// ZIP creator system constants used in version_made_by field +pub(crate) const CREATOR_UNIX: u16 = 3; +pub(crate) const CREATOR_MACOS: u16 = 19; +pub(crate) const CREATOR_NTFS: u16 = 11; +pub(crate) const CREATOR_VFAT: u16 = 14; +pub(crate) const CREATOR_FAT: u16 = 0; + +/// File mode information for a given zip file entry. +/// +/// This represents Unix-style file permissions and type information. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct EntryMode(u32); + +impl EntryMode { + /// Creates a new Mode from a raw mode value. + #[must_use] + pub(crate) const fn new(value: u32) -> Self { + Self(value) + } + + /// Returns the raw mode value + #[must_use] + pub const fn value(&self) -> u32 { + self.0 + } + + /// Returns true if this is a symbolic link. + #[must_use] + pub const fn is_symlink(&self) -> bool { + self.0 & S_IFMT == S_IFLNK + } + + /// Returns the Unix permission bits (e.g., 0o755). + #[must_use] + pub const fn permissions(&self) -> u32 { + self.0 & 0o777 + } +} + +/// Unix file type and permission constants +const S_IFMT: u32 = 0o170000; // File type mask +const S_IFSOCK: u32 = 0o140000; // Socket +const S_IFLNK: u32 = 0o120000; // Symbolic link +const S_IFREG: u32 = 0o100000; // Regular file +const S_IFBLK: u32 = 0o060000; // Block device +const S_IFDIR: u32 = 0o040000; // Directory +const S_IFCHR: u32 = 0o020000; // Character device +const S_IFIFO: u32 = 0o010000; // FIFO +const S_ISUID: u32 = 0o004000; // Set user ID +const S_ISGID: u32 = 0o002000; // Set group ID +const S_ISVTX: u32 = 0o001000; // Sticky bit + +/// MSDOS file attribute constants +const MSDOS_DIR: u32 = 0x10; +const MSDOS_READONLY: u32 = 0x01; + +/// Converts Unix mode to file mode +pub(crate) fn unix_mode_to_file_mode(m: u32) -> u32 { + let mut mode = m & 0o777; // Basic permissions + + // Set file type bits based on Unix mode + match m & S_IFMT { + S_IFBLK => mode |= S_IFBLK, + S_IFCHR => mode |= S_IFCHR, + S_IFDIR => mode |= S_IFDIR, + S_IFIFO => mode |= S_IFIFO, + S_IFLNK => mode |= S_IFLNK, + S_IFSOCK => mode |= S_IFSOCK, + _ => mode |= S_IFREG, // Default to regular file + } + + // Set special permission bits + if m & S_ISGID != 0 { + mode |= S_ISGID; + } + if m & S_ISUID != 0 { + mode |= S_ISUID; + } + if m & S_ISVTX != 0 { + mode |= S_ISVTX; + } + + mode +} + +/// Converts MSDOS attributes to file mode, following Go's zip reader logic +pub(crate) fn msdos_mode_to_file_mode(m: u32) -> u32 { + if m & MSDOS_DIR != 0 { + S_IFDIR | 0o777 + } else if m & MSDOS_READONLY != 0 { + S_IFREG | 0o444 + } else { + S_IFREG | 0o666 + } +} diff --git a/crates/soapberry-zip/src/office.rs b/crates/soapberry-zip/src/office.rs new file mode 100644 index 0000000..b9ddb42 --- /dev/null +++ b/crates/soapberry-zip/src/office.rs @@ -0,0 +1,641 @@ +//! High-level ZIP archive API optimized for Office document formats. +//! +//! This module provides a simplified interface for reading and writing ZIP archives, +//! specifically optimized for OOXML, ODF, and iWork file formats that use Deflate +//! compression exclusively. +//! +//! # Reading Archives +//! +//! ```rust,no_run +//! use soapberry_zip::office::ArchiveReader; +//! +//! let data = std::fs::read("document.docx")?; +//! let archive = ArchiveReader::new(&data)?; +//! +//! // Read a specific file +//! let content = archive.read("word/document.xml")?; +//! +//! // Iterate over all files +//! for name in archive.file_names() { +//! println!("{}", name); +//! } +//! # Ok::<(), Box>(()) +//! ``` +//! +//! # Writing Archives +//! +//! ```rust,no_run +//! use soapberry_zip::office::ArchiveWriter; +//! +//! let mut writer = ArchiveWriter::new(); +//! writer.write_stored("mimetype", b"application/vnd.oasis.opendocument.text")?; +//! writer.write_deflated("content.xml", b"...")?; +//! let bytes = writer.finish()?; +//! # Ok::<(), Box>(()) +//! ``` + +use crate::{ + CompressionMethod, Error, ErrorKind, ZipArchive, ZipArchiveWriter, ZipSliceArchive, + ZipVerification, +}; +use flate2::Compression; +use flate2::read::DeflateDecoder; +use flate2::write::DeflateEncoder; +use std::collections::HashMap; +use std::io::{Read, Write}; + +/// High-performance ZIP archive reader for Office document formats. +/// +/// Provides a simple API for reading ZIP archives with automatic decompression. +/// Optimized for OOXML (.docx, .xlsx, .pptx), ODF (.odt, .ods, .odp), and +/// iWork (.pages, .numbers, .key) formats. +/// +/// # Performance +/// +/// - Zero-copy parsing of archive structure +/// - Lazy decompression - only decompress files when accessed +/// - Pre-indexed file lookup for O(1) access by name +pub struct ArchiveReader<'data> { + archive: ZipSliceArchive<&'data [u8]>, + /// Pre-built index for fast file lookup by name + index: HashMap, +} + +/// Information about an archive entry for fast lookup +#[derive(Debug, Clone)] +struct EntryInfo { + wayfinder: crate::ZipArchiveEntryWayfinder, + compression_method: CompressionMethod, + uncompressed_size: u64, +} + +impl<'data> ArchiveReader<'data> { + /// Create a new archive reader from a byte slice. + /// + /// This parses the ZIP central directory and builds an index for fast + /// file lookup. The actual file contents are not decompressed until + /// accessed via `read()`. + pub fn new(data: &'data [u8]) -> Result { + let archive = ZipArchive::from_slice(data)?; + + // Build index for fast lookup + let mut index = HashMap::new(); + for entry_result in archive.entries() { + let entry = entry_result?; + let path = entry.file_path(); + + // Normalize path - convert to string, skip directories + if entry.is_dir() { + continue; + } + + let name = match path.try_normalize() { + Ok(normalized) => normalized.as_ref().to_string(), + Err(_) => { + // Fallback to raw path as lossy UTF-8 + String::from_utf8_lossy(path.as_ref()).to_string() + }, + }; + + index.insert( + name, + EntryInfo { + wayfinder: entry.wayfinder(), + compression_method: entry.compression_method(), + uncompressed_size: entry.uncompressed_size_hint(), + }, + ); + } + + Ok(Self { archive, index }) + } + + /// Get the number of files in the archive (excluding directories). + #[inline] + pub fn len(&self) -> usize { + self.index.len() + } + + /// Check if the archive is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.index.is_empty() + } + + /// Check if a file exists in the archive. + #[inline] + pub fn contains(&self, name: &str) -> bool { + // Try exact match first + if self.index.contains_key(name) { + return true; + } + // Try without leading slash + let normalized = name.strip_prefix('/').unwrap_or(name); + self.index.contains_key(normalized) + } + + /// Get an iterator over all file names in the archive. + pub fn file_names(&self) -> impl Iterator { + self.index.keys().map(|s| s.as_str()) + } + + /// Read and decompress a file from the archive. + /// + /// Returns the decompressed contents of the file. Supports both stored + /// (uncompressed) and deflated entries. + pub fn read(&self, name: &str) -> Result, Error> { + // Normalize name - remove leading slash if present + let normalized = name.strip_prefix('/').unwrap_or(name); + + let info = self + .index + .get(normalized) + .ok_or_else(|| Error::from(ErrorKind::FileNotFound(normalized.to_string())))?; + + let entry = self.archive.get_entry(info.wayfinder)?; + let data = entry.data(); + + match info.compression_method { + CompressionMethod::Store => { + // Stored (uncompressed) - verify and return directly + let verifier = entry.claim_verifier(); + verifier.valid(ZipVerification { + crc: crate::crc32(data), + uncompressed_size: data.len() as u64, + })?; + Ok(data.to_vec()) + }, + CompressionMethod::Deflate => { + // Deflate - decompress with pre-allocated buffer + // Using unsafe to avoid costly buffer zeroing from read_to_end + let size = info.uncompressed_size as usize; + let mut decompressed = Vec::with_capacity(size); + + // SAFETY: We set the length to the expected uncompressed size. + // The decompression will write exactly `size` bytes (verified by CRC32). + // Any unwritten bytes at the end are truncated after reading. + #[allow(unsafe_code, clippy::uninit_vec)] + unsafe { + decompressed.set_len(size); + } + + let mut decoder = entry.verifying_reader(DeflateDecoder::new(data)); + let mut total_read = 0; + while total_read < size { + match decoder.read(&mut decompressed[total_read..]) { + Ok(0) => break, + Ok(n) => total_read += n, + Err(e) if e.kind() == std::io::ErrorKind::Interrupted => continue, + Err(e) => return Err(e.into()), + } + } + + // Truncate to actual bytes read (handles size mismatch gracefully) + decompressed.truncate(total_read); + Ok(decompressed) + }, + other => Err(Error::from(ErrorKind::UnsupportedCompressionMethod( + other.as_id().as_u16(), + ))), + } + } + + /// Read a file as a UTF-8 string. + /// + /// Convenience method that reads and decodes the file as UTF-8. + pub fn read_string(&self, name: &str) -> Result { + let bytes = self.read(name)?; + String::from_utf8(bytes).map_err(|e| { + Error::from(ErrorKind::Io(std::io::Error::new( + std::io::ErrorKind::InvalidData, + e, + ))) + }) + } + + /// Read and decompress multiple files in parallel. + /// + /// This uses rayon for parallel decompression, providing significant speedup + /// when reading many compressed files (typical for OOXML/ODF documents). + /// + /// Returns a vector of (name, result) pairs in the same order as input. + /// Each result is either the decompressed bytes or an error. + /// + /// # Example + /// ```rust,no_run + /// use soapberry_zip::office::ArchiveReader; + /// + /// let data = std::fs::read("document.docx")?; + /// let archive = ArchiveReader::new(&data)?; + /// + /// let files = vec!["word/document.xml", "word/styles.xml"]; + /// let results = archive.read_many_parallel(&files); + /// + /// for (name, result) in results { + /// match result { + /// Ok(bytes) => println!("{}: {} bytes", name, bytes.len()), + /// Err(e) => eprintln!("{}: error: {}", name, e), + /// } + /// } + /// # Ok::<(), Box>(()) + /// ``` + pub fn read_many_parallel<'a, S: AsRef + Sync>( + &self, + names: &'a [S], + ) -> Vec<(&'a S, Result, Error>)> { + use rayon::prelude::*; + + names + .par_iter() + .map(|name| (name, self.read(name.as_ref()))) + .collect() + } + + /// Read all files from the archive in parallel. + /// + /// Returns a HashMap mapping file names to their decompressed contents. + /// Files that fail to decompress are skipped (not included in result). + /// + /// This is optimal when you need to access most/all files in the archive. + pub fn read_all_parallel(&self) -> HashMap> { + use rayon::prelude::*; + + // Collect keys to Vec first for proper parallel iteration + // par_bridge() doesn't parallelize HashMap iteration effectively + let keys: Vec<&String> = self.index.keys().collect(); + + keys.into_par_iter() + .filter_map(|name| self.read(name).ok().map(|data| (name.clone(), data))) + .collect() + } +} + +impl std::fmt::Debug for ArchiveReader<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ArchiveReader") + .field("file_count", &self.index.len()) + .finish() + } +} + +/// High-performance streaming ZIP archive writer for Office document formats. +/// +/// This is the recommended writer for creating complete ZIP archives. +pub struct StreamingArchiveWriter { + archive: ZipArchiveWriter, +} + +impl StreamingArchiveWriter>> { + /// Create a new streaming archive writer that writes to memory. + pub fn new() -> Self { + Self { + archive: ZipArchiveWriter::new(std::io::Cursor::new(Vec::new())), + } + } + + /// Finish writing and return the ZIP archive bytes. + pub fn finish_to_bytes(self) -> Result, Error> { + let cursor = self.archive.finish()?; + Ok(cursor.into_inner()) + } +} + +impl StreamingArchiveWriter { + /// Create a new streaming archive writer with a custom writer. + pub fn with_writer(writer: W) -> Self { + Self { + archive: ZipArchiveWriter::new(writer), + } + } + + /// Write a file without compression (stored). + pub fn write_stored(&mut self, name: &str, data: &[u8]) -> Result<(), Error> { + self.archive.write_stored_file(name, data) + } + + /// Write a file with Deflate compression. + pub fn write_deflated(&mut self, name: &str, data: &[u8]) -> Result<(), Error> { + let (mut entry, config) = self + .archive + .new_file(name) + .compression_method(CompressionMethod::Deflate) + .start()?; + + let encoder = DeflateEncoder::new(&mut entry, Compression::default()); + let mut writer = config.wrap(encoder); + writer.write_all(data)?; + let (encoder, desc) = writer.finish()?; + encoder.finish()?; + entry.finish(desc)?; + Ok(()) + } + + /// Finish writing the archive. + pub fn finish(self) -> Result { + self.archive.finish() + } +} + +impl Default for StreamingArchiveWriter>> { + fn default() -> Self { + Self::new() + } +} + +// Ensure ArchiveReader is Send + Sync for parallel iteration +// This is a compile-time assertion +const _: () = { + const fn assert_send_sync() {} + assert_send_sync::>(); +}; + +/// Lazy ZIP archive reader with on-demand decompression and caching. +/// +/// Unlike `ArchiveReader::read_all_parallel()` which decompresses everything upfront, +/// this reader decompresses files on-demand as they are accessed. This is optimal for: +/// - Large archives where only a subset of files are needed +/// - Pipelining decompression with parsing (process files as they become available) +/// - Reducing memory pressure by not holding all decompressed data at once +/// +/// The reader uses interior mutability for thread-safe caching of decompressed data. +/// +/// # Example +/// ```rust,no_run +/// use soapberry_zip::office::LazyArchiveReader; +/// +/// let data = std::fs::read("document.docx")?; +/// let archive = LazyArchiveReader::new(&data)?; +/// +/// // Files are decompressed on first access and cached +/// let content = archive.read("word/document.xml")?; +/// +/// // Subsequent reads return cached data (no re-decompression) +/// let content2 = archive.read("word/document.xml")?; +/// # Ok::<(), Box>(()) +/// ``` +pub struct LazyArchiveReader<'data> { + /// The underlying archive reader (for decompression) + inner: ArchiveReader<'data>, + /// Thread-safe cache of decompressed files + cache: std::sync::RwLock>>>, +} + +impl<'data> LazyArchiveReader<'data> { + /// Create a new lazy archive reader from a byte slice. + pub fn new(data: &'data [u8]) -> Result { + let inner = ArchiveReader::new(data)?; + Ok(Self { + inner, + cache: std::sync::RwLock::new(HashMap::new()), + }) + } + + /// Get the number of files in the archive. + #[inline] + pub fn len(&self) -> usize { + self.inner.len() + } + + /// Check if the archive is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + /// Check if a file exists in the archive. + #[inline] + pub fn contains(&self, name: &str) -> bool { + self.inner.contains(name) + } + + /// Get an iterator over all file names in the archive. + pub fn file_names(&self) -> impl Iterator { + self.inner.file_names() + } + + /// Read and decompress a file, using cache if available. + /// + /// Returns a cloned Vec for API compatibility. For zero-copy access, + /// use `read_shared()` which returns an Arc. + pub fn read(&self, name: &str) -> Result, Error> { + self.read_shared(name).map(|arc| (*arc).clone()) + } + + /// Read and decompress a file, returning a shared reference. + /// + /// This is more efficient than `read()` when the same file is accessed + /// multiple times, as it avoids cloning the decompressed data. + pub fn read_shared(&self, name: &str) -> Result>, Error> { + let normalized = name.strip_prefix('/').unwrap_or(name); + + // Fast path: check if already cached (read lock) + { + let cache = self.cache.read().unwrap(); + if let Some(data) = cache.get(normalized) { + return Ok(std::sync::Arc::clone(data)); + } + } + + // Slow path: decompress and cache (write lock) + let data = self.inner.read(normalized)?; + let arc = std::sync::Arc::new(data); + + { + let mut cache = self.cache.write().unwrap(); + // Double-check in case another thread cached it while we were decompressing + if let Some(existing) = cache.get(normalized) { + return Ok(std::sync::Arc::clone(existing)); + } + cache.insert(normalized.to_string(), std::sync::Arc::clone(&arc)); + } + + Ok(arc) + } + + /// Read multiple files in parallel WITHOUT caching. + /// + /// This is the fastest method for bulk decompression when you need to read + /// many files at once and don't need caching. Avoids all cloning overhead. + /// + /// Returns a HashMap mapping file names to their decompressed contents. + /// Files that fail to decompress are not included in the result. + pub fn read_many_parallel(&self, names: &[&str]) -> HashMap> { + use rayon::prelude::*; + + // Parallel decompression without caching for maximum performance + names + .par_iter() + .filter_map(|name| { + let normalized = name.strip_prefix('/').unwrap_or(name); + self.inner + .read(normalized) + .ok() + .map(|data| (normalized.to_string(), data)) + }) + .collect() + } + + /// Read multiple files in parallel with caching. + /// + /// This efficiently decompresses multiple files in parallel while still + /// benefiting from caching. Files already in cache are returned immediately. + /// Use this when you expect to read the same files multiple times. + /// + /// Returns a HashMap mapping file names to their decompressed contents. + /// Files that fail to decompress are not included in the result. + pub fn read_many_parallel_cached(&self, names: &[&str]) -> HashMap> { + use rayon::prelude::*; + + // Separate cached and uncached files + let (cached, uncached): (Vec<&str>, Vec<&str>) = { + let cache = self.cache.read().unwrap(); + names.iter().partition(|name| { + let normalized = name.strip_prefix('/').unwrap_or(*name); + cache.contains_key(normalized) + }) + }; + + // Start with cached results + let mut results: HashMap> = { + let cache = self.cache.read().unwrap(); + cached + .into_iter() + .filter_map(|name| { + let normalized = name.strip_prefix('/').unwrap_or(name); + cache + .get(normalized) + .map(|arc| (normalized.to_string(), (**arc).clone())) + }) + .collect() + }; + + // Decompress uncached files in parallel + if !uncached.is_empty() { + let decompressed: Vec<_> = uncached + .into_par_iter() + .filter_map(|name| { + let normalized = name.strip_prefix('/').unwrap_or(name); + self.inner + .read(normalized) + .ok() + .map(|data| (normalized.to_string(), data)) + }) + .collect(); + + // Cache the newly decompressed files + { + let mut cache = self.cache.write().unwrap(); + for (name, data) in &decompressed { + if !cache.contains_key(name.as_str()) { + cache.insert(name.clone(), std::sync::Arc::new(data.clone())); + } + } + } + + results.extend(decompressed); + } + + results + } + + /// Read all files in parallel, caching results. + /// + /// Similar to `ArchiveReader::read_all_parallel()` but caches results + /// for potential future access. + pub fn read_all_parallel(&self) -> HashMap> { + let names: Vec<&str> = self.inner.file_names().collect(); + self.read_many_parallel(&names) + } + + /// Get the number of cached files. + pub fn cache_size(&self) -> usize { + self.cache.read().unwrap().len() + } + + /// Clear the decompression cache to free memory. + pub fn clear_cache(&self) { + self.cache.write().unwrap().clear(); + } + + /// Take ownership of cached data, consuming the cache. + /// + /// Returns all cached files and clears the cache. This is useful when + /// you want to take ownership of the decompressed data without cloning. + pub fn take_cache(&self) -> HashMap> { + let mut cache = self.cache.write().unwrap(); + let mut result = HashMap::with_capacity(cache.len()); + for (name, arc) in cache.drain() { + // Try to unwrap the Arc; if there are other references, clone instead + match std::sync::Arc::try_unwrap(arc) { + Ok(data) => { + result.insert(name, data); + }, + Err(arc) => { + result.insert(name, (*arc).clone()); + }, + } + } + result + } +} + +impl std::fmt::Debug for LazyArchiveReader<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("LazyArchiveReader") + .field("file_count", &self.inner.len()) + .field("cache_size", &self.cache_size()) + .finish() + } +} + +// Ensure LazyArchiveReader is Send + Sync +const _: () = { + const fn assert_send_sync() {} + assert_send_sync::>(); +}; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_round_trip_stored() { + let mut writer = StreamingArchiveWriter::new(); + writer.write_stored("test.txt", b"Hello, World!").unwrap(); + let bytes = writer.finish_to_bytes().unwrap(); + + let reader = ArchiveReader::new(&bytes).unwrap(); + assert!(reader.contains("test.txt")); + assert_eq!(reader.read("test.txt").unwrap(), b"Hello, World!"); + } + + #[test] + fn test_round_trip_deflated() { + let mut writer = StreamingArchiveWriter::new(); + writer + .write_deflated("content.xml", b"Hello") + .unwrap(); + let bytes = writer.finish_to_bytes().unwrap(); + + let reader = ArchiveReader::new(&bytes).unwrap(); + assert!(reader.contains("content.xml")); + assert_eq!(reader.read("content.xml").unwrap(), b"Hello"); + } + + #[test] + fn test_multiple_files() { + let mut writer = StreamingArchiveWriter::new(); + writer + .write_stored("mimetype", b"application/test") + .unwrap(); + writer.write_deflated("content.xml", b"").unwrap(); + writer.write_deflated("styles.xml", b"").unwrap(); + let bytes = writer.finish_to_bytes().unwrap(); + + let reader = ArchiveReader::new(&bytes).unwrap(); + assert_eq!(reader.len(), 3); + assert_eq!(reader.read("mimetype").unwrap(), b"application/test"); + assert_eq!(reader.read("content.xml").unwrap(), b""); + assert_eq!(reader.read("styles.xml").unwrap(), b""); + } +} diff --git a/crates/soapberry-zip/src/path.rs b/crates/soapberry-zip/src/path.rs new file mode 100644 index 0000000..65727f3 --- /dev/null +++ b/crates/soapberry-zip/src/path.rs @@ -0,0 +1,475 @@ +//! Path handling for ZIP archives with type-safe raw and normalized paths. +//! +//! This module provides a comprehensive system for handling file paths from ZIP +//! archives with strong safety guarantees against path traversal attacks (zip +//! slip vulnerabilities). +//! +//! ## Path Types +//! +//! The main type is [`ZipFilePath`], which is generic over three possible path +//! types with different safety levels: +//! +//! - [`RawPath`]: Direct bytes from ZIP archive (⚠️ may contain malicious +//! paths) +//! - [`NormalizedPath`]: Validated and sanitized path +//! - [`NormalizedPathBuf`]: Owned version of normalized path +//! +//! ## Raw Paths +//! +//! Raw paths provide direct access to the original bytes from the ZIP file +//! without any validation. +//! +//! May contain the following: +//! +//! - Directory traversal: `../`, `..\\`, `..` sequences +//! - Absolute paths: `/etc/passwd`, `C:\\Windows\\system32` +//! - Invalid UTF-8: Arbitrary byte sequences that aren't valid text +//! +//! ## Normalized Paths +//! +//! Normalized paths have been validated and sanitized according to these rules: +//! +//! - Assumed to be UTF-8 ([zip file names aren't always +//! UTF-8](https://fasterthanli.me/articles/the-case-for-sans-io#character-encoding-differences)) +//! - Path separators: All backslashes (`\`) converted to forward slashes (`/`) +//! - Redundant slashes: Multiple consecutive slashes (`//`) reduced to single +//! slash +//! - Relative components: Current directory (`.`) and parent directory (`..`) +//! resolved +//! - Leading separators: Absolute paths made relative (`/foo` → `foo`) +//! - Drive letters: Windows drive prefixes removed (`C:\\foo` → `foo`) +//! - Escape prevention: Paths cannot escape the archive root directory +//! +//! ## Usage Examples +//! +//! ```rust +//! use soapberry_zip::path::ZipFilePath; +//! +//! // From raw bytes +//! let raw_path = ZipFilePath::from_bytes(b"../../../etc/passwd"); +//! let safe_path = raw_path.try_normalize()?; // Returns error if invalid UTF-8 +//! assert_eq!(safe_path.as_str(), "etc/passwd"); +//! +//! // From string +//! let normalized_path = ZipFilePath::from_str("dir\\file.txt"); +//! assert_eq!(normalized_path.as_str(), "dir/file.txt"); +//! assert_eq!(String::from(normalized_path), "dir/file.txt"); +//! +//! // Backslashes to forward slashes +//! let path = ZipFilePath::from_str("dir\\subdir\\file.txt"); +//! assert_eq!(path.as_str(), "dir/subdir/file.txt"); +//! +//! // Remove redundant slashes +//! let path = ZipFilePath::from_str("dir//subdir///file.txt"); +//! assert_eq!(path.as_str(), "dir/subdir/file.txt"); +//! +//! // Resolve relative components +//! let path = ZipFilePath::from_str("dir/../file.txt"); +//! assert_eq!(path.as_str(), "file.txt"); +//! +//! // Remove leading slashes (absolute → relative) +//! let path = ZipFilePath::from_str("/etc/passwd"); +//! assert_eq!(path.as_str(), "etc/passwd"); +//! +//! // Prevent directory traversal +//! let path = ZipFilePath::from_str("../../../etc/passwd"); +//! assert_eq!(path.as_str(), "etc/passwd"); +//! +//! // Get string from normalized path +//! let path = ZipFilePath::from_str("dir/file.txt"); +//! let my_str = String::from(path.into_owned()); +//! assert_eq!(my_str, String::from("dir/file.txt")); +//! +//! # Ok::<(), Box>(()) +//! ``` +//! +//! ## UTF-8 Encoding Detection +//! +//! The library automatically detects when paths contain characters that require +//! UTF-8 encoding in ZIP files (beyond the default CP-437 encoding). This +//! information is used internally when creating ZIP archives. + +use crate::{Error, ZipStr}; +use std::borrow::Cow; + +/// Raw path data directly from a ZIP archive. +/// +/// **Warning**: Contains unvalidated bytes that may include malicious path components. +/// Use [`ZipFilePath::try_normalize()`] to create a safe path. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct RawPath<'a>(ZipStr<'a>); + +impl AsRef<[u8]> for RawPath<'_> { + #[inline] + fn as_ref(&self) -> &[u8] { + self.0.as_bytes() + } +} + +/// A normalized and sanitized path from a ZIP archive. +/// +/// This path has been validated and sanitized according to the normalization +/// rules described in the module documentation. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct NormalizedPath<'a>(Cow<'a, str>); + +impl AsRef<[u8]> for NormalizedPath<'_> { + #[inline] + fn as_ref(&self) -> &[u8] { + self.0.as_bytes() + } +} + +impl AsRef for NormalizedPath<'_> { + #[inline] + fn as_ref(&self) -> &str { + self.0.as_ref() + } +} + +/// An owned, normalized path from a ZIP archive. +/// +/// Owned version of [`NormalizedPath`] with the same safety guarantees. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct NormalizedPathBuf(String); + +impl AsRef<[u8]> for NormalizedPathBuf { + #[inline] + fn as_ref(&self) -> &[u8] { + self.0.as_bytes() + } +} + +impl AsRef for NormalizedPathBuf { + #[inline] + fn as_ref(&self) -> &str { + &self.0 + } +} + +/// Type-safe wrapper for ZIP archive file paths. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct ZipFilePath { + data: R, +} + +impl ZipFilePath<()> { + /// Creates a raw path from bytes. + /// + /// **Warning**: The resulting path is unvalidated. Use [`ZipFilePath::try_normalize()`] + /// to create a safe path. + #[inline] + pub fn from_bytes(data: &[u8]) -> ZipFilePath> { + ZipFilePath { + data: RawPath(ZipStr::new(data)), + } + } + + /// Creates a normalized path from a UTF-8 string. + /// + /// The path is automatically normalized according to the rules described in the module + /// documentation. When possible, the original string reference is preserved to avoid allocation. + #[inline] + #[allow(clippy::should_implement_trait)] // Can't implement FromStr due to lifetime issues + pub fn from_str(mut name: &str) -> ZipFilePath> { + let mut last = 0; + for &c in name.as_bytes() { + if matches!( + (c, last), + (b'\\', _) | (b'/', b'/') | (b'.', b'.') | (b'.', b'/') | (b':', _) + ) { + // slow path: intrusive string manipulations required + return ZipFilePath { + data: NormalizedPath(Cow::Owned(Self::normalize_alloc(name))), + }; + } + last = c; + } + + loop { + // Fast path: before we trim, do a quick check if they are even necessary. + name = match name.as_bytes() { + [b'.', b'.', b'/', ..] => name.trim_start_matches("../"), + [b'.', b'/', ..] => name.trim_start_matches("./"), + [b'/', ..] => name.trim_start_matches('/'), + _ => { + return ZipFilePath { + data: NormalizedPath(Cow::Borrowed(name)), + }; + }, + } + } + } + + fn normalize_alloc(s: &str) -> String { + // 4.4.17.1 All slashes MUST be forward slashes '/' + let s = s.replace('\\', "/"); + + // 4.4.17.1 MUST NOT contain a drive or device letter + let s = s.split(':').next_back().unwrap_or_default(); + + // resolve path components + let splits = s.split('/'); + let mut result = String::new(); + for split in splits { + if split.is_empty() || split == "." { + continue; + } + + if split == ".." { + let last = result.rfind('/'); + result.truncate(last.unwrap_or(0)); + continue; + } + + if !result.is_empty() { + result.push('/'); + } + + result.push_str(split); + } + + result + } +} + +impl ZipFilePath +where + R: AsRef<[u8]>, +{ + /// Returns true if the file path represents a directory. + /// + /// Determined by the path ending with a forward slash (`/`). + #[inline] + pub fn is_dir(&self) -> bool { + self.data.as_ref().last() == Some(&b'/') + } + + /// Returns the length of the path in bytes. + #[inline] + pub fn len(&self) -> usize { + self.data.as_ref().len() + } + + /// Returns true if the path is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.data.as_ref().is_empty() + } +} + +impl ZipFilePath +where + R: AsRef, +{ + /// Determines if the path requires UTF-8 encoding based on CP-437 compatibility. + /// + /// Returns `true` if the path contains characters that cannot be represented in CP-437 + /// (the default ZIP encoding), requiring the UTF-8 flag to be set in the ZIP file. + pub(crate) fn needs_utf8_encoding(&self) -> bool { + for ch in self.data.as_ref().chars() { + let code_point = ch as u32; + + // Forbid 0x7e (~) and 0x5c (\) since EUC-KR and Shift-JIS replace those + // characters with localized currency and overline characters. + // Also forbid control characters (< 0x20) and characters above 0x7d. + if !(0x20..=0x7d).contains(&code_point) || code_point == 0x5c { + return true; + } + } + + false + } +} + +impl<'a> ZipFilePath> { + /// Returns the raw bytes of the zip file path. + #[inline] + pub fn as_bytes(&self) -> &'a [u8] { + self.data.0.as_bytes() + } + + /// Attempts to normalize this raw path into a safe, validated path. + /// + /// Validates the raw bytes as UTF-8 and applies normalization rules. + /// + /// # Errors + /// + /// Returns an error if the file path contains invalid UTF-8 sequences. + #[inline] + pub fn try_normalize(self) -> Result>, Error> { + let raw_data = self.data.0; + let name = std::str::from_utf8(raw_data.as_bytes()).map_err(Error::utf8)?; + Ok(ZipFilePath::from_str(name)) + } +} + +impl AsRef<[u8]> for ZipFilePath> { + #[inline] + fn as_ref(&self) -> &[u8] { + self.data.0.as_bytes() + } +} + +impl AsRef for ZipFilePath> { + #[inline] + fn as_ref(&self) -> &str { + self.data.0.as_ref() + } +} + +impl AsRef for ZipFilePath { + #[inline] + fn as_ref(&self) -> &str { + self.data.0.as_ref() + } +} + +impl From> for String { + #[inline] + fn from(path: ZipFilePath) -> Self { + path.data.0 + } +} + +impl From>> for String { + #[inline] + fn from(path: ZipFilePath>) -> Self { + path.data.0.into_owned() + } +} + +impl ZipFilePath> { + /// Returns the normalized string slice. + #[inline] + pub fn as_str(&self) -> &str { + self.data.0.as_ref() + } + + /// Converts this borrowed path into an owned path. + /// + /// Similar to [`Cow::into_owned`] + #[inline] + pub fn into_owned(self) -> ZipFilePath { + ZipFilePath { + data: NormalizedPathBuf(self.data.0.into_owned()), + } + } +} + +impl ZipFilePath { + /// Returns the normalized string slice. + #[inline] + pub fn as_str(&self) -> &str { + self.data.0.as_ref() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use rstest::rstest; + + #[rstest] + #[case(b"test.txt", "test.txt")] + #[case(b"dir/test.txt", "dir/test.txt")] + #[case(b"dir\\test.txt", "dir/test.txt")] + #[case(b"dir//test.txt", "dir/test.txt")] + #[case(b"/test.txt", "test.txt")] + #[case(b"../test.txt", "test.txt")] + #[case(b"dir/../test.txt", "test.txt")] + #[case(b"./test.txt", "test.txt")] + #[case(b"dir/./test.txt", "dir/test.txt")] + #[case(b"dir/./../test.txt", "test.txt")] + #[case(b"dir/sub/../test.txt", "dir/test.txt")] + #[case(b"dir/../../test.txt", "test.txt")] + #[case(b"../../../test.txt", "test.txt")] + #[case(b"a/b/../../test.txt", "test.txt")] + #[case(b"a/b/c/../../../test.txt", "test.txt")] + #[case(b"a/b/c/d/../../test.txt", "a/b/test.txt")] + #[case(b"C:\\hello\\test.txt", "hello/test.txt")] + #[case(b"C:/hello\\test.txt", "hello/test.txt")] + #[case(b"C:/hello/test.txt", "hello/test.txt")] + fn test_zip_path_normalized(#[case] input: &[u8], #[case] expected: &str) { + assert_eq!( + ZipFilePath::from_bytes(input) + .try_normalize() + .unwrap() + .as_ref(), + expected + ); + } + + #[rstest] + #[case(&[0xFF])] + #[case(&[b't', b'e', b's', b't', 0xFF])] + fn test_zip_path_normalized_invalid_utf8(#[case] input: &[u8]) { + assert!(ZipFilePath::from_bytes(input).try_normalize().is_err()); + } + + #[rstest] + #[case("test.txt", false)] + #[case("hello_world", false)] + #[case("file.name.ext", false)] + #[case("hello!", false)] + #[case("hello{world}", false)] + #[case("hello|world", false)] + #[case("hello`world", false)] + #[case("hello\"world", false)] + #[case("hello", false)] + #[case("hello;world", false)] + #[case("hello:world", false)] + #[case("hello^world", false)] + #[case("hello\u{00A0}world", true)] + #[case("hello\u{0080}world", true)] + #[case("hello\u{00FF}world", true)] + #[case("hello\u{0100}world", true)] + #[case("hello\u{03B1}world", true)] + #[case("hello\u{4E00}world", true)] + #[case("hello\u{1F600}world", true)] + #[case(r"hello\world", false)] // Backslash gets normalized to forward slash + #[case("hello~world", true)] + #[case("hello\u{007F}world", true)] + #[case("hello\u{001F}world", true)] + #[case("hello\u{0000}world", true)] + #[case("hello\u{0001}world", true)] + #[case("hello\u{000A}world", true)] + #[case("hello\u{000D}world", true)] + #[case("hello\u{0009}world", true)] + #[case("", false)] + #[case(" ", false)] + #[case("hello\u{007E}world", true)] + #[case("hello\u{007D}world", false)] + fn test_needs_utf8_encoding(#[case] input: &str, #[case] expected: bool) { + let path = ZipFilePath::from_str(input); + assert_eq!( + path.needs_utf8_encoding(), + expected, + "Failed for input: {}", + input + ); + } + + #[test] + fn test_path_lifetime_test() { + let normalized_path = ZipFilePath::from_bytes(b"test.txt") + .try_normalize() + .unwrap(); + assert_eq!(normalized_path.as_ref(), "test.txt"); + assert_eq!(normalized_path.len(), 8); + } + + #[test] + fn test_raw_path_lifetime_preservation() { + use std::str::Utf8Error; + + // See https://github.com/nickbabcock/rawzip/issues/101 + fn file_path_utf8<'a>(path: ZipFilePath>) -> Result<&'a str, Utf8Error> { + std::str::from_utf8(path.as_bytes()) + } + + let raw_path = ZipFilePath::from_bytes(b"test/file.txt"); + let result = file_path_utf8(raw_path).unwrap(); + assert_eq!(result, "test/file.txt"); + } +} diff --git a/crates/soapberry-zip/src/reader_at.rs b/crates/soapberry-zip/src/reader_at.rs new file mode 100644 index 0000000..fb76b06 --- /dev/null +++ b/crates/soapberry-zip/src/reader_at.rs @@ -0,0 +1,533 @@ +use crate::errors::{Error, ErrorKind}; +use std::io::Read; +use std::ops::Range; +#[cfg(unix)] +use std::os::unix::fs::FileExt; +#[cfg(windows)] +use std::os::windows::fs::FileExt; +use std::{rc::Rc, sync::Arc}; + +/// Provides reading bytes at a specific offset +/// +/// This trait is similar to [`std::io::Read`] but with an additional offset +/// parameter that signals where the read should begin offset from the start of +/// the data. This allows methods to not require a mutable reference to the +/// reader, which is critical for zip files to easily offer decompression of +/// multiple files simultaneously without needing to store them in memory. +/// +/// This trait is modelled after Go's +/// [`io.ReaderAt`](https://pkg.go.dev/io#ReaderAt) interface, which is used by +/// their own [Zip implementation](https://pkg.go.dev/archive/zip#NewReader). +pub trait ReaderAt { + /// Read bytes from the reader at a specific offset + fn read_at(&self, buf: &mut [u8], offset: u64) -> std::io::Result; + + /// Sibling to [`read_exact`](std::io::Read::read_exact), but at an offset + fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> std::io::Result<()> { + let mut read = 0; + while read < buf.len() { + let latest = self.read_at(&mut buf[read..], offset + (read as u64))?; + if latest == 0 { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "failed to fill whole buffer", + )); + } + read += latest; + } + Ok(()) + } +} + +pub(crate) trait ReaderAtExt { + fn try_read_at_least_at( + &self, + buffer: &mut [u8], + size: usize, + offset: u64, + ) -> std::io::Result; + + fn read_at_least_at(&self, buffer: &mut [u8], size: usize, offset: u64) + -> Result; +} + +impl ReaderAtExt for T { + fn try_read_at_least_at( + &self, + buffer: &mut [u8], + mut size: usize, + offset: u64, + ) -> std::io::Result { + size = size.min(buffer.len()); + let mut pos = 0; + while pos < size { + let read = self.read_at(&mut buffer[pos..], offset + pos as u64)?; + if read == 0 { + return Ok(pos); + } + pos += read; + } + Ok(pos) + } + + fn read_at_least_at( + &self, + buffer: &mut [u8], + size: usize, + offset: u64, + ) -> Result { + if buffer.len() < size { + return Err(Error::from(ErrorKind::BufferTooSmall)); + } + + let read = self.try_read_at_least_at(buffer, size, offset)?; + + if read < size { + return Err(Error::from(ErrorKind::Eof)); + } + + Ok(read) + } +} + +#[cfg(not(any(unix, windows)))] +#[derive(Debug)] +pub struct FileReader(MutexReader); + +/// A file wrapper that implements [`ReaderAt`] across platforms. +#[cfg(any(unix, windows))] +#[derive(Debug)] +pub struct FileReader(std::fs::File); + +impl FileReader { + pub fn into_inner(self) -> std::fs::File { + #[cfg(not(any(unix, windows)))] + return self.0.into_inner(); + #[cfg(any(unix, windows))] + return self.0; + } +} + +impl ReaderAt for FileReader { + #[inline] + fn read_at(&self, buf: &mut [u8], offset: u64) -> std::io::Result { + #[cfg(unix)] + return self.0.read_at(buf, offset); + #[cfg(windows)] + return self.0.seek_read(buf, offset); + #[cfg(not(any(unix, windows)))] + return self.0.read_at(buf, offset); + } +} + +impl std::io::Seek for FileReader { + #[inline] + fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result { + self.0.seek(pos) + } +} + +impl From for FileReader { + #[cfg(not(any(unix, windows)))] + fn from(file: std::fs::File) -> Self { + Self(MutexReader(std::sync::Mutex::new(file))) + } + + #[cfg(any(unix, windows))] + fn from(file: std::fs::File) -> Self { + Self(file) + } +} + +/// A reader that is wrapped in a mutex to allow for concurrent reads. +#[derive(Debug)] +pub struct MutexReader(std::sync::Mutex); + +impl MutexReader { + pub fn new(inner: R) -> Self { + Self(std::sync::Mutex::new(inner)) + } + + pub fn into_inner(self) -> R { + self.0.into_inner().unwrap() + } +} + +impl ReaderAt for MutexReader +where + R: std::io::Read + std::io::Seek, +{ + /// For seekable implementations, we can emulate the read_at method by + /// seeking to the offset, reading the data, and then seeking back to the + /// original position within a mutex. + /// + /// This is how Go implements the `io.ReaderAt` interface for filed on + /// Windows: + /// https://github.com/golang/go/blob/70b603f4d295573197b43ad090d7cad21895144e/src/internal/poll/fd_windows.go#L525 + fn read_at(&self, buf: &mut [u8], offset: u64) -> std::io::Result { + let mut lock = self.0.lock().unwrap(); + let original_position = lock.stream_position()?; + lock.seek(std::io::SeekFrom::Start(offset))?; + let result = lock.read(buf); + lock.seek(std::io::SeekFrom::Start(original_position))?; + result + } +} + +impl std::io::Read for MutexReader +where + R: std::io::Read, +{ + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + self.0.lock().unwrap().read(buf) + } +} + +impl std::io::Seek for MutexReader +where + R: std::io::Seek, +{ + fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result { + self.0.lock().unwrap().seek(pos) + } +} + +impl ReaderAt for &'_ T { + #[inline] + fn read_at(&self, buf: &mut [u8], offset: u64) -> std::io::Result { + (*self).read_at(buf, offset) + } +} + +impl ReaderAt for &'_ mut T { + #[inline] + fn read_at(&self, buf: &mut [u8], offset: u64) -> std::io::Result { + (**self).read_at(buf, offset) + } +} + +impl ReaderAt for &[u8] { + #[inline] + fn read_at(&self, buf: &mut [u8], offset: u64) -> std::io::Result { + let skip = self.len().min(offset as usize); + let data = &self[skip..]; + let len = data.len().min(buf.len()); + buf[..len].copy_from_slice(&data[..len]); + Ok(len) + } +} + +impl ReaderAt for std::io::Cursor +where + R: AsRef<[u8]>, +{ + #[inline] + fn read_at(&self, buf: &mut [u8], offset: u64) -> std::io::Result { + let data = self.get_ref().as_ref(); + data.read_at(buf, offset) + } +} + +impl ReaderAt for Vec { + #[inline] + fn read_at(&self, buf: &mut [u8], offset: u64) -> std::io::Result { + self.as_slice().read_at(buf, offset) + } +} + +impl ReaderAt for Arc { + #[inline] + fn read_at(&self, buf: &mut [u8], offset: u64) -> std::io::Result { + (**self).read_at(buf, offset) + } +} + +impl ReaderAt for Rc { + #[inline] + fn read_at(&self, buf: &mut [u8], offset: u64) -> std::io::Result { + (**self).read_at(buf, offset) + } +} + +impl ReaderAt for Box { + #[inline] + fn read_at(&self, buf: &mut [u8], offset: u64) -> std::io::Result { + (**self).read_at(buf, offset) + } +} + +/// A reader that reads a specific range of data from a [`ReaderAt`] source. +/// +/// `RangeReader` implements [`std::io::Read`] and provides bounded reading +/// within a specified range of offsets. It maintains its current position and +/// ensures reads don't exceed the defined end boundary. +/// +/// Useful when working with APIs that operate on [`std::io::Read`] instead of +/// [`ReaderAt`]. For instance, incrementally reading large prelude and trailing +/// data of a ZIP file. +/// +/// # Examples +/// +/// Reading prelude data from a zip file: +/// +/// ``` +/// use std::io::Read; +/// use soapberry_zip::{ZipArchive, RangeReader, RECOMMENDED_BUFFER_SIZE}; +/// use std::fs::File; +/// +/// let file = File::open("assets/test-prefix.zip")?; +/// let mut buffer = vec![0u8; RECOMMENDED_BUFFER_SIZE]; +/// let archive = ZipArchive::from_file(file, &mut buffer)?; +/// +/// // Typically you only need the first entry to find where the zip data starts +/// // but this is the longer form that examines every entry in case they are +/// // out of order +/// let mut zip_start_offset = archive.directory_offset(); +/// let mut entries = archive.entries(&mut buffer); +/// while let Some(entry) = entries.next_entry()? { +/// zip_start_offset = zip_start_offset.min(entry.local_header_offset()); +/// } +/// +/// // For example purposes, just slurp up all the prelude data +/// let mut prelude_reader = RangeReader::new(archive.get_ref(), 0..zip_start_offset); +/// prelude_reader.read_exact(&mut buffer[..zip_start_offset as usize])?; +/// assert_eq!( +/// &buffer[..zip_start_offset as usize], +/// b"prefix that could be an executable jar file" +/// ); +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Debug, Clone)] +pub struct RangeReader { + archive: R, + offset: u64, + end_offset: u64, +} + +impl RangeReader { + /// Creates a new `RangeReader` that will read data from the specified range. + #[inline] + pub fn new(archive: R, range: Range) -> Self { + Self { + archive, + offset: range.start, + end_offset: range.end, + } + } + + /// Returns the current read position within the range. + #[inline] + pub fn position(&self) -> u64 { + self.offset + } + + /// Returns the remaining bytes that are expected to be read from the + /// current position. + /// + /// When a range reader is constructed with a range that exceeds the + /// underlying reader, remaining will be non-zero when `read()` returns zero + /// signalling the end of the stream. + #[inline] + pub fn remaining(&self) -> u64 { + self.end_offset - self.offset + } + + /// Returns the end offset of the range. + #[inline] + pub fn end_offset(&self) -> u64 { + self.end_offset + } + + /// Returns a reference to the underlying reader. + #[inline] + pub fn get_ref(&self) -> &R { + &self.archive + } + + /// Consumes the self and returns the underlying reader. + #[inline] + pub fn into_inner(self) -> R { + self.archive + } +} + +impl Read for RangeReader +where + R: ReaderAt, +{ + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + let read_size = buf.len().min(self.remaining() as usize); + let read = self.archive.read_at(&mut buf[..read_size], self.offset)?; + self.offset += read as u64; + Ok(read) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Cursor; + + const TEST_DATA: &[u8] = b"Hello, World! This is test data for ReaderAt implementations."; + + fn test_reader_at_impl(reader: R, data_len: usize) { + let mut buf = [0u8; 5]; + + // Test reading from start + assert_eq!(reader.read_at(&mut buf, 0).unwrap(), 5); + assert_eq!(&buf, b"Hello"); + + // Test reading from offset + buf.fill(0); + assert_eq!(reader.read_at(&mut buf, 7).unwrap(), 5); + assert_eq!(&buf, b"World"); + + // Test read beyond data length + buf.fill(0); + let bytes_read = reader.read_at(&mut buf, data_len as u64).unwrap(); + assert_eq!(bytes_read, 0); + + // Test partial read at end of data + buf.fill(0); + let bytes_read = reader.read_at(&mut buf, (data_len - 3) as u64).unwrap(); + assert_eq!(bytes_read, 3); + assert_eq!(&buf[..3], &TEST_DATA[data_len - 3..]); + } + + #[test] + fn test_smart_pointer_implementations() { + let data = TEST_DATA.to_vec(); + + // Test Arc> + let arc_reader = Arc::new(data.clone()); + test_reader_at_impl(&*arc_reader, data.len()); + test_reader_at_impl(arc_reader, data.len()); + + // Test Rc> + let rc_reader = Rc::new(data.clone()); + test_reader_at_impl(&*rc_reader, data.len()); + test_reader_at_impl(rc_reader, data.len()); + + // Test Box> + let box_reader = Box::new(data.clone()); + test_reader_at_impl(&*box_reader, data.len()); + test_reader_at_impl(box_reader, data.len()); + } + + #[test] + fn test_reference_implementations() { + let mut data = TEST_DATA.to_vec(); + let data_len = data.len(); + + test_reader_at_impl(&data, data_len); + test_reader_at_impl(&mut data, data_len); + } + + #[test] + fn test_byte_slice_implementation() { + let data = TEST_DATA; + test_reader_at_impl(data, data.len()); + } + + #[test] + fn test_cursor_implementation() { + let data = TEST_DATA.to_vec(); + let cursor = Cursor::new(data.clone()); + test_reader_at_impl(&cursor, data.len()); + } + + #[test] + fn test_vec_implementation() { + let data = TEST_DATA.to_vec(); + test_reader_at_impl(&data, data.len()); + } + + #[test] + fn test_range_reader_basic() { + let data = b"Hello, World! This is test data."; + let mut range_reader = RangeReader::new(data.as_slice(), 7..13); + + let mut buffer = [0u8; 10]; + let bytes_read = range_reader.read(&mut buffer).unwrap(); + + assert_eq!(bytes_read, 6); + assert_eq!(&buffer[..bytes_read], b"World!"); + } + + #[test] + fn test_range_reader_multiple_reads() { + let data = b"0123456789"; + let mut range_reader = RangeReader::new(data.as_slice(), 2..8); + + let mut buffer = [0u8; 3]; + let bytes_read1 = range_reader.read(&mut buffer).unwrap(); + assert_eq!(bytes_read1, 3); + assert_eq!(&buffer[..bytes_read1], b"234"); + assert_eq!(range_reader.position(), 5); + + let bytes_read2 = range_reader.read(&mut buffer).unwrap(); + assert_eq!(bytes_read2, 3); + assert_eq!(&buffer[..bytes_read2], b"567"); + assert_eq!(range_reader.position(), 8); + + // Should return 0 when at end + let bytes_read3 = range_reader.read(&mut buffer).unwrap(); + assert_eq!(bytes_read3, 0); + } + + #[test] + fn test_range_reader_empty_range() { + let data = b"Hello, World!"; + let mut range_reader = RangeReader::new(data.as_slice(), 5..5); + + let mut buffer = [0u8; 10]; + let bytes_read = range_reader.read(&mut buffer).unwrap(); + + assert_eq!(bytes_read, 0); + assert_eq!(range_reader.remaining(), 0); + } + + #[test] + fn test_range_reader_get_ref_and_into_inner() { + let data = b"Hello, World!"; + let range_reader = RangeReader::new(data.as_slice(), 0..5); + + assert_eq!(range_reader.get_ref(), &data.as_slice()); + let inner = range_reader.into_inner(); + assert_eq!(inner, data.as_slice()); + } + + #[test] + fn test_range_reader_clone() { + let data = b"Hello, World!"; + let range_reader = RangeReader::new(data.as_slice(), 0..5); + let cloned = range_reader.clone(); + + assert_eq!(range_reader.position(), cloned.position()); + assert_eq!(range_reader.remaining(), cloned.remaining()); + } + + #[test] + fn test_range_reader_range_exceeds_data() { + let data = b"Hello"; + + // Test range that starts within data but extends beyond + let mut reader1 = RangeReader::new(data.as_slice(), 3..10); + let mut buf1 = [0u8; 10]; + let read1 = reader1.read(&mut buf1).unwrap(); + assert_eq!(read1, 2); // Only reads "lo" + assert_eq!(&buf1[..read1], b"lo"); + + // Test range that starts at end of data + let mut reader2 = RangeReader::new(data.as_slice(), 5..10); + let mut buf2 = [0u8; 10]; + let read2 = reader2.read(&mut buf2).unwrap(); + assert_eq!(read2, 0); // No data to read + + // Test range that starts beyond data + let mut reader3 = RangeReader::new(data.as_slice(), 10..20); + let mut buf3 = [0u8; 10]; + let read3 = reader3.read(&mut buf3).unwrap(); + assert_eq!(read3, 0); // No data to read + } +} diff --git a/crates/soapberry-zip/src/time.rs b/crates/soapberry-zip/src/time.rs new file mode 100644 index 0000000..a5cd358 --- /dev/null +++ b/crates/soapberry-zip/src/time.rs @@ -0,0 +1,1232 @@ +//! ZIP file timestamp handling +//! +//! Datetimes for ZIP files come in two flavors: UTC and local time. It is not +//! possible for the local time zone to be encoded in the ZIP format, so +//! converting between the two requires assuming that UTC is the local time. +//! +//! When reading a ZIP file, [`ZipDateTimeKind`] will provide information about +//! the timestamp's original time zone (UTC and local time) +//! +//! However, when writing a ZIP file, only a [`UtcDateTime`] is supported. +//! +//! # Example: Copying Modification Times +//! +//! This example shows how to read a ZIP file and create a new one while +//! preserving modification times: +//! +//! ``` +//! use soapberry_zip::{ZipArchive, ZipArchiveWriter, ZipDataWriter}; +//! use soapberry_zip::time::{ZipDateTimeKind, UtcDateTime}; +//! use std::io::Write; +//! +//! // Read a test ZIP file with timestamps +//! let input_data = include_bytes!("../assets/time-go.zip"); +//! let input_archive = ZipArchive::from_slice(input_data).unwrap(); +//! +//! // Create output archive +//! let mut output_data = Vec::new(); +//! let mut output_archive = ZipArchiveWriter::new(&mut output_data); +//! +//! // Copy each entry with its modification time +//! let mut entries = input_archive.entries(); +//! while let Ok(Some(entry)) = entries.next_entry() { +//! let name = entry.file_path().try_normalize().unwrap().as_ref().to_string(); +//! let modification_time = entry.last_modified(); +//! +//! let utc_time = match modification_time { +//! ZipDateTimeKind::Utc(utc_time) => utc_time, +//! ZipDateTimeKind::Local(local_time) => { +//! // Convert local time to UTC by reinterpreting the components +//! // This treats the local time as if it were UTC +//! UtcDateTime::from_components( +//! local_time.year(), +//! local_time.month(), +//! local_time.day(), +//! local_time.hour(), +//! local_time.minute(), +//! local_time.second(), +//! local_time.nanosecond() +//! ).unwrap() +//! } +//! }; +//! +//! if !entry.is_dir() { +//! // Copy file with preserved modification time +//! let (mut entry, config) = output_archive.new_file(&name) +//! .last_modified(utc_time) +//! .start() +//! .unwrap(); +//! let mut writer = config.wrap(&mut entry); +//! writer.write_all(b"example data").unwrap(); +//! let (_, descriptor) = writer.finish().unwrap(); +//! entry.finish(descriptor).unwrap(); +//! } else { +//! // Copy directory with preserved modification time +//! output_archive.new_dir(&name) +//! .last_modified(utc_time) +//! .create() +//! .unwrap(); +//! } +//! } +//! +//! output_archive.finish().unwrap(); +//! +//! // Verify the output archive preserves timestamps +//! let output_archive = ZipArchive::from_slice(&output_data).unwrap(); +//! +//! assert!(output_archive.entries_hint() > 0, "Output should contain entries"); +//! +//! // Verify at least one entry has a UTC timestamp +//! let mut output_entries = output_archive.entries(); +//! let mut has_utc_timestamp = false; +//! while let Ok(Some(entry)) = output_entries.next_entry() { +//! if matches!(entry.last_modified(), ZipDateTimeKind::Utc(_)) { +//! has_utc_timestamp = true; +//! break; +//! } +//! } +//! assert!(has_utc_timestamp, "Output should contain UTC timestamps"); +//! ``` + +use crate::{ + extra_fields::{ExtraFieldId, ExtraFields}, + utils::{le_u16, le_u32, le_u64}, +}; + +/// Represents the time zone of a timestamp. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TimeZone { + /// UTC (Coordinated Universal Time) + Utc, + /// Local time (timezone unknown) + Local, +} + +/// Marker type for UTC timezone +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Utc; + +/// Marker type for Local timezone +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Local; + +/// Trait for timezone markers +pub trait TimeZoneMarker { + fn timezone() -> TimeZone; +} + +impl TimeZoneMarker for Utc { + fn timezone() -> TimeZone { + TimeZone::Utc + } +} + +impl TimeZoneMarker for Local { + fn timezone() -> TimeZone { + TimeZone::Local + } +} + +/// Represents a timestamp found in a ZIP file +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct ZipDateTime { + year: u16, + month: u8, // 1-12 + day: u8, // 1-31 + hour: u8, // 0-23 + minute: u8, // 0-59 + second: u8, // 0-59 + nanosecond: u32, // 0-999,999,999 + _timezone: std::marker::PhantomData, +} + +/// Type alias for UTC timestamps +pub type UtcDateTime = ZipDateTime; + +/// Type alias for Local timestamps +pub type LocalDateTime = ZipDateTime; + +/// Enum for timestamp parsing results that can be either UTC or Local +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ZipDateTimeKind { + Utc(UtcDateTime), + Local(LocalDateTime), +} + +impl ZipDateTimeKind { + /// Returns the timezone of this timestamp + #[must_use] + pub const fn timezone(&self) -> TimeZone { + match self { + ZipDateTimeKind::Utc(_) => TimeZone::Utc, + ZipDateTimeKind::Local(_) => TimeZone::Local, + } + } + + /// Returns the year component of the timestamp + #[must_use] + pub fn year(&self) -> u16 { + match self { + ZipDateTimeKind::Utc(dt) => dt.year(), + ZipDateTimeKind::Local(dt) => dt.year(), + } + } + + /// Returns the month component (1-12) of the timestamp + #[must_use] + pub fn month(&self) -> u8 { + match self { + ZipDateTimeKind::Utc(dt) => dt.month(), + ZipDateTimeKind::Local(dt) => dt.month(), + } + } + + /// Returns the day component (1-31) of the timestamp + #[must_use] + pub fn day(&self) -> u8 { + match self { + ZipDateTimeKind::Utc(dt) => dt.day(), + ZipDateTimeKind::Local(dt) => dt.day(), + } + } + + /// Returns the hour component (0-23) of the timestamp + #[must_use] + pub fn hour(&self) -> u8 { + match self { + ZipDateTimeKind::Utc(dt) => dt.hour(), + ZipDateTimeKind::Local(dt) => dt.hour(), + } + } + + /// Returns the minute component (0-59) of the timestamp + #[must_use] + pub fn minute(&self) -> u8 { + match self { + ZipDateTimeKind::Utc(dt) => dt.minute(), + ZipDateTimeKind::Local(dt) => dt.minute(), + } + } + + /// Returns the second component (0-59) of the timestamp + #[must_use] + pub fn second(&self) -> u8 { + match self { + ZipDateTimeKind::Utc(dt) => dt.second(), + ZipDateTimeKind::Local(dt) => dt.second(), + } + } + + /// Returns the nanosecond component (0-999,999,999) of the timestamp + #[must_use] + pub fn nanosecond(&self) -> u32 { + match self { + ZipDateTimeKind::Utc(dt) => dt.nanosecond(), + ZipDateTimeKind::Local(dt) => dt.nanosecond(), + } + } +} + +impl std::fmt::Display for ZipDateTimeKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ZipDateTimeKind::Utc(dt) => dt.fmt(f), + ZipDateTimeKind::Local(dt) => dt.fmt(f), + } + } +} + +impl std::fmt::Display for ZipDateTime { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // Write out the date and time in ISO 8601 format. RFC 3339 requires a + // time zone, which we won't have for local times. + write!( + f, + "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}", + self.year, self.month, self.day, self.hour, self.minute, self.second + )?; + if self.nanosecond != 0 { + write!(f, ".{:09}", self.nanosecond)?; + } + match TZ::timezone() { + TimeZone::Utc => write!(f, "Z"), + TimeZone::Local => Ok(()), + } + } +} + +impl ZipDateTime { + /// Creates a ZipDateTime from date/time components with validation. + /// + /// # Arguments + /// + /// * `year` - Year (1-65535) + /// * `month` - Month (1-12) + /// * `day` - Day of month (1-31, validated against month) + /// * `hour` - Hour (0-23) + /// * `minute` - Minute (0-59) + /// * `second` - Second (0-59) + /// * `nanosecond` - Nanosecond (0-999,999,999), defaults to 0 + /// + /// # Errors + /// + /// Returns `None` if any component is invalid or the date doesn't exist + /// (e.g. February 30th, April 31st). + /// + /// # Examples + /// + /// ``` + /// # use soapberry_zip::time::{UtcDateTime, LocalDateTime}; + /// let utc_datetime = UtcDateTime::from_components( + /// 2023, 6, 15, 14, 30, 45, 500_000_000 + /// ).unwrap(); + /// assert_eq!(utc_datetime.year(), 2023); + /// assert_eq!(utc_datetime.nanosecond(), 500_000_000); + /// + /// // Invalid date returns None + /// assert!(UtcDateTime::from_components(2023, 2, 30, 0, 0, 0, 0).is_none()); + /// ``` + pub fn from_components( + year: u16, + month: u8, + day: u8, + hour: u8, + minute: u8, + second: u8, + nanosecond: u32, + ) -> Option { + // Validate components + if year == 0 + || month == 0 + || month > 12 + || day == 0 + || hour > 23 + || minute > 59 + || second > 59 + || nanosecond > 999_999_999 + { + return None; + } + + let max_day = last_day_of_month(year, month); + if day > max_day { + return None; + } + + Some(Self { + year, + month, + day, + hour, + minute, + second, + nanosecond, + _timezone: std::marker::PhantomData, + }) + } + + /// Returns the year component of the timestamp. + #[must_use] + pub const fn year(&self) -> u16 { + self.year + } + + /// Returns the month component (1-12) of the timestamp. + #[must_use] + pub const fn month(&self) -> u8 { + self.month + } + + /// Returns the day component (1-31) of the timestamp. + #[must_use] + pub const fn day(&self) -> u8 { + self.day + } + + /// Returns the hour component (0-23) of the timestamp. + #[must_use] + pub const fn hour(&self) -> u8 { + self.hour + } + + /// Returns the minute component (0-59) of the timestamp. + #[must_use] + pub const fn minute(&self) -> u8 { + self.minute + } + + /// Returns the second component (0-59) of the timestamp. + #[must_use] + pub const fn second(&self) -> u8 { + self.second + } + + /// Returns the nanosecond component (0-999,999,999) of the timestamp. + /// For timestamps that don't support nanosecond precision, this returns 0. + #[must_use] + pub const fn nanosecond(&self) -> u32 { + self.nanosecond + } + + /// Returns the timezone of this timestamp. + #[must_use] + pub fn timezone(&self) -> TimeZone { + TZ::timezone() + } + + /// Calculate days since Unix epoch (1970-01-01) for this date. + /// + /// Based on Howard Hinnant's `days_from_civil` algorithm: + /// + /// + /// Negative values indicate dates prior to 1970-01-01. + const fn days_from_civil(&self) -> i32 { + let (y, m) = if self.month <= 2 { + (self.year as i32 - 1, self.month as i32 + 9) + } else { + (self.year as i32, self.month as i32 - 3) + }; + + // Calculate era (400-year cycles) + let era = y / 400; + let yoe = y - era * 400; // year of era [0, 399] + + // Calculate day of year + let doy = (153 * m + 2) / 5 + self.day as i32 - 1; // day of year [0, 365] + + // Calculate day of era + let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy; // day of era [0, 146096] + + // Calculate days since epoch (era 0 starts at year 0, not 1970) + era * 146097 + doe - 719468 + } +} + +impl ZipDateTime { + /// Creates a ZipDateTime from a Unix timestamp (seconds since epoch) + pub fn from_unix(seconds: i64) -> UtcDateTime { + let (year, month, day, hour, minute, second) = unix_timestamp_to_components(seconds); + ZipDateTime { + year, + month, + day, + hour, + minute, + second, + nanosecond: 0, + _timezone: std::marker::PhantomData, + } + } + + /// Creates a ZipDateTime from an NTFS timestamp (100ns ticks since 1601) + pub(crate) fn from_ntfs(ticks: u64) -> UtcDateTime { + let unix_seconds = (ticks / 10_000_000).saturating_sub(NTFS_EPOCH_OFFSET) as i64; + let (year, month, day, hour, minute, second) = unix_timestamp_to_components(unix_seconds); + let nanosecond = ((ticks % 10_000_000) * 100) as u32; + ZipDateTime { + year, + month, + day, + hour, + minute, + second, + nanosecond, + _timezone: std::marker::PhantomData, + } + } + + /// Convert to Unix timestamp (seconds since epoch). + /// + /// Returns the number of seconds since the Unix epoch (1970-01-01 00:00:00 UTC). + /// Negative values represent dates before 1970. + #[must_use] + pub fn to_unix(&self) -> i64 { + let days_since_epoch = self.days_from_civil(); + + (i64::from(days_since_epoch)) * 86400 + + (i64::from(self.hour)) * 3600 + + (i64::from(self.minute)) * 60 + + (i64::from(self.second)) + } +} + +impl ZipDateTime { + /// Creates a ZipDateTime from a DosDateTime + pub(crate) fn from_dos(dos: DosDateTime) -> LocalDateTime { + // Note: DOS timestamps with month=0 and day=0 are a gray area. Some + // seem to normalize to 1980-01-01 while others normalize to 1979-11-30. + ZipDateTime { + year: dos.year(), + month: dos.month(), + day: dos.day(), + hour: dos.hour(), + minute: dos.minute(), + second: dos.second(), + nanosecond: 0, + _timezone: std::marker::PhantomData, + } + } +} + +/// Represents an MS-DOS timestamp with 2-second precision. +/// +/// MS-DOS timestamps are stored as packed 16-bit values for date and time, +/// with a limited range from 1980 to 2107 and 2-second precision for seconds. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct DosDateTime { + time: u16, + date: u16, +} + +impl DosDateTime { + /// Creates a new MS-DOS datetime from packed date and time values. + #[must_use] + pub(crate) const fn new(time: u16, date: u16) -> Self { + Self { time, date } + } + + /// Returns the year (1980-2107). + #[must_use] + pub fn year(&self) -> u16 { + ((self.date >> 9) & 0x7f) + 1980 + } + + /// Returns the month (1-12). + #[must_use] + pub fn month(&self) -> u8 { + let raw_month = ((self.date >> 5) & 0x0f) as u8; + raw_month.clamp(1, 12) + } + + /// Returns the day of the month (1-31). + #[must_use] + pub fn day(&self) -> u8 { + let raw_day = (self.date & 0x1f) as u8; + raw_day.clamp(1, last_day_of_month(self.year(), self.month())) + } + + /// Returns the hour (0-23). + #[must_use] + pub fn hour(&self) -> u8 { + let raw_hour = ((self.time >> 11) & 0x1f) as u8; + raw_hour.min(23) + } + + /// Returns the minute (0-59). + #[must_use] + pub fn minute(&self) -> u8 { + let raw_minute = ((self.time >> 5) & 0x3f) as u8; + raw_minute.min(59) + } + + /// Returns the second (0-58, always even due to 2-second precision). + #[must_use] + pub fn second(&self) -> u8 { + let raw_second = ((self.time & 0x1f) * 2) as u8; + raw_second.min(58) + } + + /// Returns the packed time and date components as (time, date). + #[must_use] + pub(crate) const fn into_parts(self) -> (u16, u16) { + (self.time, self.date) + } +} + +impl From<&ZipDateTime> for DosDateTime { + fn from(zip_dt: &ZipDateTime) -> Self { + // Saturate year to DOS range (1980-2107) + let dos_year = zip_dt.year.clamp(1980, 2107); + + // Pack the date: bits 15-9: year-1980, bits 8-5: month, bits 4-0: day + let packed_date = + ((dos_year - 1980) << 9) | ((zip_dt.month as u16) << 5) | (zip_dt.day as u16); + + // Pack the time: bits 15-11: hour, bits 10-5: minute, bits 4-0: second/2 + let packed_time = ((zip_dt.hour as u16) << 11) + | ((zip_dt.minute as u16) << 5) + | ((zip_dt.second as u16) / 2); + + Self { + time: packed_time, + date: packed_date, + } + } +} + +/// Extracts timestamp from the extra field using "last wins" strategy. +/// Returns the last valid timestamp found, or falls back to MS-DOS if none found. +/// This matches Go's zip reader behavior. +pub(crate) fn extract_best_timestamp( + extra_fields: ExtraFields<'_>, + dos_time: u16, + dos_date: u16, +) -> ZipDateTimeKind { + let mut last_timestamp = None; + + for (field_id, field_data) in extra_fields { + match field_id { + ExtraFieldId::NTFS => { + if let Some(timestamp) = parse_ntfs_timestamp(field_data) { + last_timestamp = Some(ZipDateTimeKind::Utc(timestamp)); + } + }, + ExtraFieldId::EXTENDED_TIMESTAMP => { + if let Some(timestamp) = parse_extended_timestamp(field_data) { + last_timestamp = Some(ZipDateTimeKind::Utc(timestamp)); + } + }, + ExtraFieldId::INFO_ZIP_UNIX_ORIGINAL => { + if let Some(timestamp) = parse_unix_timestamp(field_data) { + last_timestamp = Some(ZipDateTimeKind::Utc(timestamp)); + } + }, + _ => {}, + } + } + + // Return the last timestamp found, or fall back to MS-DOS + last_timestamp.unwrap_or_else(|| { + ZipDateTimeKind::Local(LocalDateTime::from_dos(DosDateTime::new( + dos_time, dos_date, + ))) + }) +} + +/// Parses NTFS timestamp extra field (0x000a) +fn parse_ntfs_timestamp(data: &[u8]) -> Option { + if data.len() < 32 { + return None; + } + + // NTFS extra field format: + // 4 bytes: reserved (usually 0) + // 2 bytes: attribute tag (0x0001 for timestamps) + // 2 bytes: attribute size (24 bytes for 3 timestamps) + // 8 bytes: modification time + // 8 bytes: access time + // 8 bytes: creation time + + let tag = le_u16(&data[4..6]); + if tag != 0x0001 { + return None; + } + + let size = le_u16(&data[6..8]) as usize; + if size < 24 || data.len() < 8 + size { + return None; + } + + // Extract modification time (first 8 bytes of timestamp data) + let mtime_ticks = le_u64(&data[8..16]); + Some(UtcDateTime::from_ntfs(mtime_ticks)) +} + +/// Parses Extended Timestamp extra field (0x5455) +fn parse_extended_timestamp(data: &[u8]) -> Option { + if data.len() < 5 { + return None; + } + + let flags = data[0]; + let pos = 1; + + // Check if modification time is present (bit 0) + if flags & 0x01 != 0 && pos + 4 <= data.len() { + let mtime_seconds = le_u32(&data[pos..pos + 4]); + return Some(UtcDateTime::from_unix(i64::from(mtime_seconds))); + } + + None +} + +/// Parses Unix timestamp extra field (0x5855) - obsolete format +fn parse_unix_timestamp(data: &[u8]) -> Option { + if data.len() < 8 { + return None; + } + + // Unix format has access time first, then modification time + let mtime_seconds = le_u32(&data[4..8]); + Some(UtcDateTime::from_unix(i64::from(mtime_seconds))) +} + +/// Convert Unix timestamp to broken down date/time components +/// +/// Based on Howard Hinnant's date library algorithm `civil_from_days`: +/// +/// +fn unix_timestamp_to_components(timestamp: i64) -> (u16, u8, u8, u8, u8, u8) { + const SECONDS_PER_DAY: i64 = 86400; + + // Break timestamp into days and seconds within day + let total_days = timestamp / SECONDS_PER_DAY; + let mut seconds_in_day = timestamp % SECONDS_PER_DAY; + + // Handle negative remainder for negative timestamps + if seconds_in_day < 0 { + seconds_in_day += SECONDS_PER_DAY; + } + + // Convert seconds within day to H:M:S + let hour = (seconds_in_day / 3600) as u8; + let minute = ((seconds_in_day % 3600) / 60) as u8; + let second = (seconds_in_day % 60) as u8; + + let days_since_epoch = total_days; + + // Shift epoch from 1970-01-01 to 0000-03-01 for easier leap year handling + // This makes March 1st, year 0 our epoch (which aligns with leap year cycle) + let days_since_shifted_epoch = days_since_epoch + 719468; // Days from 0000-03-01 to 1970-01-01 + + // Calculate the era (400-year period) + let era = days_since_shifted_epoch / 146097; + let days_of_era = days_since_shifted_epoch % 146097; + + // Calculate year within the era (0-399) + let year_of_era = + (days_of_era - days_of_era / 1460 + days_of_era / 36524 - days_of_era / 146096) / 365; + + // Calculate the actual year + let year = era * 400 + year_of_era; + + // Calculate day of year + let days_before_year = year_of_era * 365 + year_of_era / 4 - year_of_era / 100; + let day_of_year = days_of_era - days_before_year; + + // Calculate month and day + // Months are shifted: Mar=0, Apr=1, ..., Dec=9, Jan=10, Feb=11 + let month_shifted = (5 * day_of_year + 2) / 153; + let day_of_month = day_of_year - (153 * month_shifted + 2) / 5 + 1; + + // Convert back to normal calendar + let (final_year, final_month) = if month_shifted < 10 { + (year, month_shifted + 3) + } else { + (year + 1, month_shifted - 9) + }; + + ( + final_year as u16, + final_month as u8, + day_of_month as u8, + hour, + minute, + second, + ) +} + +// NTFS timestamp is 100-nanosecond intervals since 1601-01-01 00:00:00 UTC +const NTFS_EPOCH_OFFSET: u64 = 11644473600; // Seconds between 1601-01-01 and 1970-01-01 + +/// Returns true if the given year is a leap year. +const fn is_leap(year: u16) -> bool { + year % 4 == 0 && (year % 100 != 0 || year % 400 == 0) +} + +/// Returns the last valid day of the given month in the given year. +const fn last_day_of_month(year: u16, month: u8) -> u8 { + if month != 2 || !is_leap(year) { + last_day_of_month_common_year(month as usize) + } else { + 29 + } +} + +const fn last_day_of_month_common_year(m: usize) -> u8 { + [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31][m - 1] +} + +#[cfg(test)] +mod tests { + use super::*; + + fn utc_from_components( + year: u16, + month: u8, + day: u8, + hour: u8, + minute: u8, + second: u8, + nanosecond: u32, + ) -> UtcDateTime { + UtcDateTime::from_components(year, month, day, hour, minute, second, nanosecond).unwrap() + } + + fn local_from_components( + year: u16, + month: u8, + day: u8, + hour: u8, + minute: u8, + second: u8, + nanosecond: u32, + ) -> LocalDateTime { + LocalDateTime::from_components(year, month, day, hour, minute, second, nanosecond).unwrap() + } + + #[test] + fn test_zip_to_dos_conversion() { + // Test normal conversion + let zip_dt = utc_from_components(2023, 6, 15, 14, 30, 45, 0); + let dos_dt: DosDateTime = (&zip_dt).into(); + let (dos_time, dos_date) = dos_dt.into_parts(); + let dos_dt_check = DosDateTime::new(dos_time, dos_date); + + assert_eq!(dos_dt_check.year(), 2023); + assert_eq!(dos_dt_check.month(), 6); + assert_eq!(dos_dt_check.day(), 15); + assert_eq!(dos_dt_check.hour(), 14); + assert_eq!(dos_dt_check.minute(), 30); + assert_eq!(dos_dt_check.second(), 44); // Rounded down to even second + } + + #[test] + fn test_zip_to_dos_year_saturation() { + // Test year before DOS range (should saturate to 1980) + let zip_dt_before = utc_from_components(1979, 6, 15, 14, 30, 45, 0); + let dos_dt: DosDateTime = (&zip_dt_before).into(); + let (dos_time, dos_date) = dos_dt.into_parts(); + let dos_dt_check = DosDateTime::new(dos_time, dos_date); + assert_eq!(dos_dt_check.year(), 1980); // Saturated to minimum + assert_eq!(dos_dt_check.month(), 6); + assert_eq!(dos_dt_check.day(), 15); + + // Test year way before DOS range + let zip_dt_way_before = utc_from_components(1800, 1, 1, 0, 0, 0, 0); + let dos_dt2: DosDateTime = (&zip_dt_way_before).into(); + let (dos_time2, dos_date2) = dos_dt2.into_parts(); + let dos_dt2_check = DosDateTime::new(dos_time2, dos_date2); + assert_eq!(dos_dt2_check.year(), 1980); // Saturated to minimum + + // Test year after DOS range (should saturate to 2107) + let zip_dt_after = utc_from_components(2108, 6, 15, 14, 30, 45, 0); + let dos_dt3: DosDateTime = (&zip_dt_after).into(); + let (dos_time3, dos_date3) = dos_dt3.into_parts(); + let dos_dt3_check = DosDateTime::new(dos_time3, dos_date3); + assert_eq!(dos_dt3_check.year(), 2107); // Saturated to maximum + assert_eq!(dos_dt3_check.month(), 6); + assert_eq!(dos_dt3_check.day(), 15); + + // Test year way after DOS range + let zip_dt_way_after = utc_from_components(3000, 12, 31, 23, 59, 59, 0); + let dos_dt4: DosDateTime = (&zip_dt_way_after).into(); + let (dos_time4, dos_date4) = dos_dt4.into_parts(); + let dos_dt4_check = DosDateTime::new(dos_time4, dos_date4); + assert_eq!(dos_dt4_check.year(), 2107); // Saturated to maximum + } + + #[test] + fn test_dos_datetime() { + // Test using the From trait + let zip_dt = utc_from_components(2023, 6, 15, 14, 30, 45, 0); + let dos_dt: DosDateTime = (&zip_dt).into(); + assert_eq!(dos_dt.year(), 2023); + assert_eq!(dos_dt.month(), 6); + assert_eq!(dos_dt.day(), 15); + assert_eq!(dos_dt.hour(), 14); + assert_eq!(dos_dt.minute(), 30); + assert_eq!(dos_dt.second(), 44); // Rounded down to even second + } + + #[test] + fn test_dos_datetime_odd_seconds() { + // Test that odd seconds are rounded down using the From trait + let zip_dt_odd = utc_from_components(2020, 1, 1, 12, 30, 45, 0); + let dos_dt_odd: DosDateTime = (&zip_dt_odd).into(); + assert_eq!(dos_dt_odd.second(), 44); // 45 rounded down to 44 + + let zip_dt_even = utc_from_components(2020, 1, 1, 12, 30, 46, 0); + let dos_dt_even: DosDateTime = (&zip_dt_even).into(); + assert_eq!(dos_dt_even.second(), 46); // 46 stays 46 + } + + #[test] + fn test_dos_datetime_edge_cases() { + // Test minimum date using From trait + let zip_dt_min = utc_from_components(1980, 1, 1, 0, 0, 0, 0); + let dos_dt_min: DosDateTime = (&zip_dt_min).into(); + assert_eq!(dos_dt_min.year(), 1980); + assert_eq!(dos_dt_min.month(), 1); + assert_eq!(dos_dt_min.day(), 1); + + // Test maximum date using From trait + let zip_dt_max = utc_from_components(2107, 12, 31, 23, 59, 58, 0); + let dos_dt_max: DosDateTime = (&zip_dt_max).into(); + assert_eq!(dos_dt_max.year(), 2107); + assert_eq!(dos_dt_max.month(), 12); + assert_eq!(dos_dt_max.day(), 31); + assert_eq!(dos_dt_max.hour(), 23); + assert_eq!(dos_dt_max.minute(), 59); + assert_eq!(dos_dt_max.second(), 58); + } + + #[test] + fn test_dos_datetime_zero_normalization() { + // Test that zero DOS timestamp (0x0000 0x0000) is normalized to 1980-01-01 00:00:00 + let datetime = DosDateTime::new(0x0000, 0x0000); + assert_eq!(datetime.year(), 1980); + assert_eq!(datetime.month(), 1); // month 0 normalized to 1 + assert_eq!(datetime.day(), 1); // day 0 normalized to 1 + assert_eq!(datetime.hour(), 0); + assert_eq!(datetime.minute(), 0); + assert_eq!(datetime.second(), 0); + + // Test partial zero normalization - only month is zero + let datetime = DosDateTime::new(0x0000, 0x0001); // day=1, month=0, year=1980 + assert_eq!(datetime.year(), 1980); + assert_eq!(datetime.month(), 1); // month 0 normalized to 1 + assert_eq!(datetime.day(), 1); + assert_eq!(datetime.hour(), 0); + assert_eq!(datetime.minute(), 0); + assert_eq!(datetime.second(), 0); + + // Test partial zero normalization - only day is zero + let datetime = DosDateTime::new(0x0000, 0x0020); // day=0, month=1, year=1980 + assert_eq!(datetime.year(), 1980); + assert_eq!(datetime.month(), 1); + assert_eq!(datetime.day(), 1); // day 0 normalized to 1 + assert_eq!(datetime.hour(), 0); + assert_eq!(datetime.minute(), 0); + assert_eq!(datetime.second(), 0); + } + + #[test] + fn test_zip_datetime_dos() { + let datetime = local_from_components(2020, 6, 15, 14, 30, 44, 0); + + assert_eq!(datetime.year(), 2020); + assert_eq!(datetime.month(), 6); + assert_eq!(datetime.day(), 15); + assert_eq!(datetime.hour(), 14); + assert_eq!(datetime.minute(), 30); + assert_eq!(datetime.second(), 44); + assert_eq!(datetime.nanosecond(), 0); + assert_eq!(datetime.timezone(), TimeZone::Local); + } + + #[test] + fn test_zip_datetime_unix() { + // Unix timestamp for 2010-09-05 02:12:01 UTC + let datetime = utc_from_components(2010, 9, 5, 2, 12, 1, 0); + + assert_eq!(datetime.year(), 2010); + assert_eq!(datetime.month(), 9); + assert_eq!(datetime.day(), 5); + assert_eq!(datetime.hour(), 2); + assert_eq!(datetime.minute(), 12); + assert_eq!(datetime.second(), 1); + assert_eq!(datetime.nanosecond(), 0); + assert_eq!(datetime.timezone(), TimeZone::Utc); + } + + #[test] + fn test_zip_datetime_ntfs() { + // NTFS timestamp for roughly 2010-09-05 02:12:01 UTC with 500ms precision + let datetime = utc_from_components(2010, 9, 5, 2, 12, 1, 500000000); + + assert_eq!(datetime.year(), 2010); + assert_eq!(datetime.month(), 9); + assert_eq!(datetime.day(), 5); + assert_eq!(datetime.hour(), 2); + assert_eq!(datetime.minute(), 12); + assert_eq!(datetime.second(), 1); + assert_eq!(datetime.nanosecond(), 500000000); + assert_eq!(datetime.timezone(), TimeZone::Utc); + } + + #[test] + fn test_to_unix_comprehensive() { + // Test comprehensive cases including edge cases and leap years + + // Test first day of each month in a leap year (2020) + let jan_1_2020 = utc_from_components(2020, 1, 1, 0, 0, 0, 0); + assert_eq!(jan_1_2020.to_unix(), 1577836800); + + let feb_29_2020 = utc_from_components(2020, 2, 29, 0, 0, 0, 0); + assert_eq!(feb_29_2020.to_unix(), 1582934400); + + let mar_1_2020 = utc_from_components(2020, 3, 1, 0, 0, 0, 0); + assert_eq!(mar_1_2020.to_unix(), 1583020800); + + // Test non-leap year (2021) + let feb_28_2021 = utc_from_components(2021, 2, 28, 0, 0, 0, 0); + assert_eq!(feb_28_2021.to_unix(), 1614470400); + + // Test century boundary (non-leap year despite being divisible by 4) + let mar_1_1900 = utc_from_components(1900, 3, 1, 0, 0, 0, 0); + // This is before Unix epoch, so returns negative value + let result = mar_1_1900.to_unix(); + assert!(result < 0); // Dates before epoch return negative values + + // Test year 2038 boundary (close to u32::MAX seconds) + let early_2038 = utc_from_components(2038, 1, 1, 0, 0, 0, 0); + let timestamp_2038 = early_2038.to_unix(); + assert!(timestamp_2038 > 0); // Should have a valid positive timestamp + + // Test far future dates (beyond u32 range but handled by i64) + let far_future = utc_from_components(2200, 1, 1, 0, 0, 0, 0); + let result = far_future.to_unix(); + // Should return a valid i64 timestamp for far future dates + assert!(result > u32::MAX as i64); // Should exceed u32 range + } + + #[test] + fn test_to_unix_accuracy() { + // Test known dates against their Unix timestamps (verified with Python datetime) + + // Unix epoch: 1970-01-01 00:00:00 UTC = 0 + let epoch = utc_from_components(1970, 1, 1, 0, 0, 0, 0); + assert_eq!(epoch.to_unix(), 0); + + // 2000-01-01 00:00:00 UTC = 946684800 + let y2k = utc_from_components(2000, 1, 1, 0, 0, 0, 0); + assert_eq!(y2k.to_unix(), 946684800); + + // 2023-06-15 14:30:45 UTC = 1686839445 + let test_date = utc_from_components(2023, 6, 15, 14, 30, 45, 0); + assert_eq!(test_date.to_unix(), 1686839445); + + // Leap year test: 2020-02-29 12:00:00 UTC = 1582977600 + let leap_day = utc_from_components(2020, 2, 29, 12, 0, 0, 0); + assert_eq!(leap_day.to_unix(), 1582977600); + + // Test dates before Unix epoch return negative values + let before_epoch = utc_from_components(1969, 12, 31, 23, 59, 59, 0); + let result = before_epoch.to_unix(); + // One second before epoch should be -1 + assert_eq!(result, -1); + } + + #[test] + fn test_negative_unix_timestamps() { + // Test that negative timestamps (before 1970) work correctly + let negative_timestamp = -86400; // One day before epoch (1969-12-31) + let datetime = UtcDateTime::from_unix(negative_timestamp); + + assert_eq!(datetime.year(), 1969); + assert_eq!(datetime.month(), 12); + assert_eq!(datetime.day(), 31); + assert_eq!(datetime.hour(), 0); + assert_eq!(datetime.minute(), 0); + assert_eq!(datetime.second(), 0); + + // Round trip test + assert_eq!(datetime.to_unix(), negative_timestamp); + } + + #[test] + fn test_days_from_civil() { + // Test Unix epoch + let epoch = utc_from_components(1970, 1, 1, 0, 0, 0, 0); + assert_eq!(epoch.days_from_civil(), 0); + + // Test Y2K (verified with Python) + let y2k = utc_from_components(2000, 1, 1, 0, 0, 0, 0); + assert_eq!(y2k.days_from_civil(), 10957); + + // Test leap year boundary (verified with Python) + let leap_day = utc_from_components(2020, 2, 29, 0, 0, 0, 0); + assert_eq!(leap_day.days_from_civil(), 18321); + + // Test before epoch (negative value) + let before_epoch = utc_from_components(1969, 12, 31, 0, 0, 0, 0); + assert_eq!(before_epoch.days_from_civil(), -1); + } + + #[test] + fn test_zip_datetime_display() { + // Test with zero nanoseconds - should omit the nanosecond part + let datetime_no_nanos = utc_from_components(2023, 6, 15, 14, 30, 42, 0); + assert_eq!(format!("{}", datetime_no_nanos), "2023-06-15T14:30:42Z"); + + // Test with non-zero nanoseconds - should include the nanosecond part + let datetime_with_nanos = utc_from_components(2023, 6, 15, 14, 30, 42, 500000000); + assert_eq!( + format!("{}", datetime_with_nanos), + "2023-06-15T14:30:42.500000000Z" + ); + + // Test local time with zero nanoseconds + let datetime_local = local_from_components(2023, 6, 15, 14, 30, 42, 0); + assert_eq!(format!("{}", datetime_local), "2023-06-15T14:30:42"); + + // Test local time with nanoseconds + let datetime_local_nanos = local_from_components(2023, 6, 15, 14, 30, 42, 123456789); + assert_eq!( + format!("{}", datetime_local_nanos), + "2023-06-15T14:30:42.123456789" + ); + } + + #[test] + fn test_parse_extended_timestamp() { + // Extended timestamp with modification time flag and Unix timestamp + let mut data = vec![0x01]; // Flags: modification time present + data.extend_from_slice(&1283652721u32.to_le_bytes()); // Unix timestamp + + let result = parse_extended_timestamp(&data).unwrap(); + // Check that it's a Unix timestamp with the right components + assert_eq!(result.year(), 2010); + assert_eq!(result.month(), 9); + assert_eq!(result.day(), 5); + assert_eq!(result.hour(), 2); + assert_eq!(result.minute(), 12); + assert_eq!(result.second(), 1); + assert_eq!(result.timezone(), TimeZone::Utc); + } + + #[test] + fn test_parse_unix_timestamp() { + // Unix timestamp format: access time (4 bytes) + modification time (4 bytes) + let mut data = vec![]; + data.extend_from_slice(&0u32.to_le_bytes()); // Access time (ignored) + data.extend_from_slice(&1283652721u32.to_le_bytes()); // Modification time + + let result = parse_unix_timestamp(&data).unwrap(); + // Check that it's a Unix timestamp with the right components + assert_eq!(result.year(), 2010); + assert_eq!(result.month(), 9); + assert_eq!(result.day(), 5); + assert_eq!(result.hour(), 2); + assert_eq!(result.minute(), 12); + assert_eq!(result.second(), 1); + assert_eq!(result.timezone(), TimeZone::Utc); + } + + #[test] + fn test_parse_ntfs_timestamp() { + // NTFS timestamp format + let mut data = vec![0; 4]; // Reserved + data.extend_from_slice(&0x0001u16.to_le_bytes()); // Tag + data.extend_from_slice(&24u16.to_le_bytes()); // Size + + // NTFS timestamp (100-nanosecond ticks since 1601-01-01) + let ticks = (1283652721 + NTFS_EPOCH_OFFSET) * 10_000_000; + data.extend_from_slice(&ticks.to_le_bytes()); // Modification time + data.extend_from_slice(&0u64.to_le_bytes()); // Access time + data.extend_from_slice(&0u64.to_le_bytes()); // Creation time + + let result = parse_ntfs_timestamp(&data).unwrap(); + // Check that it's an NTFS timestamp with the right components + assert_eq!(result.year(), 2010); + assert_eq!(result.month(), 9); + assert_eq!(result.day(), 5); + assert_eq!(result.hour(), 2); + assert_eq!(result.minute(), 12); + assert_eq!(result.second(), 1); + assert_eq!(result.timezone(), TimeZone::Utc); + } + + #[test] + fn test_zip_datetime_ordering() { + let dt1 = UtcDateTime::from_components(2020, 1, 1, 0, 0, 0, 0).unwrap(); + let dt2 = UtcDateTime::from_components(2020, 1, 1, 0, 0, 0, 500_000_000).unwrap(); // Same time, more nanoseconds + let dt3 = UtcDateTime::from_components(2020, 1, 1, 0, 0, 1, 0).unwrap(); // One second later + let dt4 = UtcDateTime::from_components(2020, 1, 1, 0, 1, 0, 0).unwrap(); // One minute later + let dt5 = UtcDateTime::from_components(2020, 1, 1, 1, 0, 0, 0).unwrap(); // One hour later + let dt6 = UtcDateTime::from_components(2020, 1, 2, 0, 0, 0, 0).unwrap(); // One day later + let dt7 = UtcDateTime::from_components(2020, 2, 1, 0, 0, 0, 0).unwrap(); // One month later + let dt8 = UtcDateTime::from_components(2021, 1, 1, 0, 0, 0, 0).unwrap(); // One year later + + let mut timestamps = vec![dt8, dt3, dt1, dt6, dt4, dt2, dt7, dt5]; + timestamps.sort_unstable(); + let expected = vec![dt1, dt2, dt3, dt4, dt5, dt6, dt7, dt8]; + assert_eq!( + timestamps, expected, + "sorting should produce chronological order" + ); + } +} + +#[cfg(test)] +mod property_tests { + //! Property-based tests to verify timestamp conversion accuracy against jiff. + + use super::*; + use quickcheck_macros::quickcheck; + + #[quickcheck] + fn prop_unix_timestamp_conversion(unix_seconds: u32) { + let zip_datetime = UtcDateTime::from_unix(i64::from(unix_seconds)); + + let Ok(timestamp) = jiff::Timestamp::from_second(unix_seconds as i64) else { + return; + }; + + let dt = timestamp.to_zoned(jiff::tz::TimeZone::UTC); + + assert_eq!(zip_datetime.year(), dt.year() as u16, "year"); + assert_eq!(zip_datetime.month(), dt.month() as u8, "month"); + assert_eq!(zip_datetime.day(), dt.day() as u8, "day"); + assert_eq!(zip_datetime.hour(), dt.hour() as u8, "hour"); + assert_eq!(zip_datetime.minute(), dt.minute() as u8, "minute"); + assert_eq!(zip_datetime.second(), dt.second() as u8, "second"); + assert_eq!(zip_datetime.timezone(), TimeZone::Utc); + assert_eq!(zip_datetime.nanosecond(), 0, "nanosecond"); + + assert_eq!( + zip_datetime.to_unix(), + i64::from(unix_seconds), + "to_unix should match input" + ); + } + + /// Property test: NTFS timestamp conversion should match jiff's conversion + #[quickcheck] + fn prop_ntfs_timestamp_conversion(ntfs_ticks: u64) { + let zip_datetime = UtcDateTime::from_ntfs(ntfs_ticks); + + // Convert NTFS ticks to Unix timestamp for jiff + // NTFS ticks are 100-nanosecond intervals since 1601-01-01 + let unix_seconds = (ntfs_ticks / 10_000_000).saturating_sub(NTFS_EPOCH_OFFSET); + let nanoseconds = ((ntfs_ticks % 10_000_000) * 100) as u32; + + if unix_seconds > u32::MAX as u64 { + return; + } + + let Ok(jiff_timestamp) = jiff::Timestamp::new(unix_seconds as i64, nanoseconds as i32) + else { + return; + }; + + let dt = jiff_timestamp.to_zoned(jiff::tz::TimeZone::UTC); + + assert_eq!(zip_datetime.year(), dt.year() as u16, "year"); + assert_eq!(zip_datetime.month(), dt.month() as u8, "month"); + assert_eq!(zip_datetime.day(), dt.day() as u8, "day"); + assert_eq!(zip_datetime.hour(), dt.hour() as u8, "hour"); + assert_eq!(zip_datetime.minute(), dt.minute() as u8, "minute"); + assert_eq!(zip_datetime.second(), dt.second() as u8, "second"); + assert_eq!(zip_datetime.timezone(), TimeZone::Utc); + assert_eq!(zip_datetime.nanosecond(), nanoseconds, "nanosecond"); + } + + /// Property test: DOS timestamp conversion should always produce valid jiff datetimes + #[quickcheck] + fn prop_dos_timestamp_always_valid(dos_time: u16, dos_date: u16) { + let dos_datetime = DosDateTime::new(dos_time, dos_date); + let zip_datetime = LocalDateTime::from_dos(dos_datetime); + + // Create jiff datetime - this should never fail with our normalization + let dt = jiff::civil::DateTime::new( + zip_datetime.year() as i16, + zip_datetime.month() as i8, + zip_datetime.day() as i8, + zip_datetime.hour() as i8, + zip_datetime.minute() as i8, + zip_datetime.second() as i8, + 0, // nanosecond + ) + .unwrap(); + + // Verify the components match what we expect + assert_eq!(zip_datetime.year(), dt.year() as u16, "year"); + assert_eq!(zip_datetime.month(), dt.month() as u8, "month"); + assert_eq!(zip_datetime.day(), dt.day() as u8, "day"); + assert_eq!(zip_datetime.hour(), dt.hour() as u8, "hour"); + assert_eq!(zip_datetime.minute(), dt.minute() as u8, "minute"); + assert_eq!(zip_datetime.second(), dt.second() as u8, "second"); + } +} diff --git a/crates/soapberry-zip/src/utils.rs b/crates/soapberry-zip/src/utils.rs new file mode 100644 index 0000000..4521b85 --- /dev/null +++ b/crates/soapberry-zip/src/utils.rs @@ -0,0 +1,14 @@ +#[inline(always)] +pub(crate) fn le_u64(d: &[u8]) -> u64 { + u64::from_le_bytes([d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7]]) +} + +#[inline(always)] +pub(crate) fn le_u32(d: &[u8]) -> u32 { + u32::from_le_bytes([d[0], d[1], d[2], d[3]]) +} + +#[inline(always)] +pub(crate) fn le_u16(d: &[u8]) -> u16 { + u16::from_le_bytes([d[0], d[1]]) +} diff --git a/crates/soapberry-zip/src/writer.rs b/crates/soapberry-zip/src/writer.rs new file mode 100644 index 0000000..f72fcd2 --- /dev/null +++ b/crates/soapberry-zip/src/writer.rs @@ -0,0 +1,1516 @@ +use crate::{ + CENTRAL_HEADER_SIGNATURE, CompressionMethod, DataDescriptor, + END_OF_CENTRAL_DIR_LOCATOR_SIGNATURE, END_OF_CENTRAL_DIR_SIGNATURE64, + END_OF_CENTRAL_DIR_SIGNAUTRE_BYTES, Error, Header, ZipFileHeaderFixed, ZipLocalFileHeaderFixed, + crc, + errors::ErrorKind, + extra_fields::{ExtraFieldId, ExtraFieldsContainer}, + mode::CREATOR_UNIX, + path::{NormalizedPath, ZipFilePath}, + time::{DosDateTime, UtcDateTime}, +}; +use std::io::{self, Write}; + +// ZIP64 constants +const ZIP64_VERSION_NEEDED: u16 = 45; // 4.5 +const ZIP64_EOCD_SIZE: usize = 56; + +// General purpose bit flags +const FLAG_DATA_DESCRIPTOR: u16 = 0x08; // bit 3: data descriptor present +const FLAG_UTF8_ENCODING: u16 = 0x800; // bit 11: UTF-8 encoding flag (EFS) + +// ZIP64 thresholds - when to switch to ZIP64 format +const ZIP64_THRESHOLD_FILE_SIZE: u64 = u32::MAX as u64; +const ZIP64_THRESHOLD_OFFSET: u64 = u32::MAX as u64; +const ZIP64_THRESHOLD_ENTRIES: usize = u16::MAX as usize; + +#[derive(Debug)] +struct CountWriter { + writer: W, + count: u64, +} + +impl CountWriter { + fn new(writer: W, count: u64) -> Self { + CountWriter { writer, count } + } + + fn count(&self) -> u64 { + self.count + } +} + +impl Write for CountWriter { + fn write(&mut self, buf: &[u8]) -> io::Result { + let bytes_written = self.writer.write(buf)?; + self.count += bytes_written as u64; + Ok(bytes_written) + } + + fn flush(&mut self) -> io::Result<()> { + self.writer.flush() + } +} + +/// Builds a `ZipArchiveWriter`. +#[derive(Debug, Default)] +pub struct ZipArchiveWriterBuilder { + count: u64, + capacity: usize, +} + +impl ZipArchiveWriterBuilder { + /// Creates a new `ZipArchiveWriterBuilder`. + pub fn new() -> Self { + Self::default() + } + + /// Sets the anticipated number of files to optimize memory allocation. + pub fn with_capacity(mut self, capacity: usize) -> Self { + self.capacity = capacity; + self + } + + /// Sets the starting offset for writing. Useful when there is prelude data + /// prior to the zip archive. + /// + /// When there is prelude data, setting the offset may not technically be + /// required, but it is recommended. For standard zip files, many zip + /// readers can self correct when the prelude data isn't properly declared. + /// However for zip64 archives, setting the correct offset is required. + /// + /// # Example: Appending ZIP to existing data + /// ```rust + /// use std::io::{Cursor, Write, Seek, SeekFrom}; + /// + /// // Create a file with some prefix data + /// let mut output = Cursor::new(Vec::new()); + /// output.write_all(b"This is a custom header or prefix data\n").unwrap(); + /// let zip_start_offset = output.position(); + /// + /// // Create ZIP archive starting after the prefix data + /// let mut archive = soapberry_zip::ZipArchiveWriter::builder() + /// .with_offset(zip_start_offset) // Tell the archive where it starts + /// .build(&mut output); + /// + /// // Add files normally + /// let mut file = archive.new_file("data.txt").create().unwrap(); + /// let mut writer = soapberry_zip::ZipDataWriter::new(&mut file); + /// writer.write_all(b"File content").unwrap(); + /// let (_, desc) = writer.finish().unwrap(); + /// file.finish(desc).unwrap(); + /// archive.finish().unwrap(); + /// + /// // The resulting file contains both prefix data and the ZIP archive + /// let final_data = output.into_inner(); + /// assert!(final_data.starts_with(b"This is a custom header")); + /// ``` + pub fn with_offset(mut self, offset: u64) -> Self { + self.count = offset; + self + } + + /// Builds a `ZipArchiveWriter` that writes to `writer`. + pub fn build(&self, writer: W) -> ZipArchiveWriter { + ZipArchiveWriter { + writer: CountWriter::new(writer, self.count), + files: Vec::with_capacity(self.capacity), + file_names: Vec::new(), + } + } +} + +/// Create a new Zip archive. +/// +/// Basic usage: +/// ```rust +/// use std::io::Write; +/// +/// let mut output = std::io::Cursor::new(Vec::new()); +/// let mut archive = soapberry_zip::ZipArchiveWriter::new(&mut output); +/// let (mut entry, config) = archive.new_file("file.txt").start().unwrap(); +/// let mut writer = config.wrap(&mut entry); +/// writer.write_all(b"Hello, world!").unwrap(); +/// let (_, output) = writer.finish().unwrap(); +/// entry.finish(output).unwrap(); +/// archive.finish().unwrap(); +/// ``` +/// +/// Use the builder for customization: +/// ```rust +/// use std::io::Write; +/// +/// let mut output = std::io::Cursor::new(Vec::::new()); +/// let mut _archive = soapberry_zip::ZipArchiveWriter::builder() +/// .with_capacity(1000) // Optimize for 1000 anticipated files +/// .build(&mut output); +/// // ... add files as usual +/// ``` +#[derive(Debug)] +pub struct ZipArchiveWriter { + files: Vec, + file_names: Vec, + writer: CountWriter, +} + +impl ZipArchiveWriter<()> { + /// Creates a `ZipArchiveWriterBuilder` for configuring the writer. + pub fn builder() -> ZipArchiveWriterBuilder { + ZipArchiveWriterBuilder::new() + } +} + +impl ZipArchiveWriter { + /// Creates a new `ZipArchiveWriter` that writes to `writer`. + pub fn new(writer: W) -> Self { + ZipArchiveWriterBuilder::new().build(writer) + } + + /// Returns the current offset in the output stream. + /// + /// Analagous to [`std::io::Cursor::position`]. + /// + /// This can be used to determine various offsets during ZIP archive + /// creation: + /// + /// - Local header offset + /// - Start of compressed data offset + /// - End of compressed data offset + /// - End of data descriptor offset / next file's local header offset + /// + /// # Example + /// + /// ```rust + /// use std::io::Write; + /// + /// let mut output = std::io::Cursor::new(Vec::new()); + /// let mut archive = soapberry_zip::ZipArchiveWriter::new(&mut output); + /// + /// // 1. Get local header offset + /// let local_header_offset = archive.stream_offset(); + /// let mut file = archive.new_file("test.txt").create().unwrap(); + /// + /// // 2. Get start of data offset + /// let data_start_offset = file.stream_offset(); + /// + /// // Write some data + /// let mut writer = soapberry_zip::ZipDataWriter::new(&mut file); + /// writer.write_all(b"Hello World").unwrap(); + /// let (_, desc) = writer.finish().unwrap(); + /// + /// // 3. Get end of compressed data offset + /// let end_data_offset = file.stream_offset(); + /// + /// let compressed_bytes = file.finish(desc).unwrap(); + /// + /// // 4. Get end of data descriptor offset (next file's local header offset) + /// let end_descriptor_offset = archive.stream_offset(); + /// + /// archive.finish().unwrap(); + /// + /// assert_eq!(local_header_offset, 0); + /// assert!(data_start_offset > local_header_offset); + /// assert_eq!(end_data_offset, data_start_offset + b"Hello World".len() as u64); + /// assert_eq!(end_descriptor_offset, end_data_offset + 16); // 16 bytes for data descriptor + /// assert_eq!(compressed_bytes, end_data_offset - data_start_offset); + /// ``` + pub fn stream_offset(&self) -> u64 { + self.writer.count() + } +} + +/// Options for CRC32 calculation in ZIP files. +#[derive(Debug, Clone, Copy, Default)] +pub enum Crc32Option { + /// Calculate CRC32 automatically from the data. + #[default] + Calculate, + /// Use a custom CRC32 value and skip calculation. + Custom(u32), + /// Skip CRC32 calculation entirely (sets CRC32 to 0). + Skip, +} + +impl Crc32Option { + /// Returns the initial CRC32 value for this option. + #[inline] + pub fn initial_value(&self) -> u32 { + match self { + Crc32Option::Calculate => 0, + Crc32Option::Custom(value) => *value, + Crc32Option::Skip => 0, + } + } +} + +/// A builder for creating a new file entry in a ZIP archive. +#[derive(Debug)] +pub struct ZipFileBuilder<'archive, 'name, W> { + archive: &'archive mut ZipArchiveWriter, + name: &'name str, + compression_method: CompressionMethod, + modification_time: Option, + unix_permissions: Option, + extra_fields: ExtraFieldsContainer, + crc32_option: Crc32Option, +} + +impl<'archive, W> ZipFileBuilder<'archive, '_, W> +where + W: Write, +{ + /// Sets the compression method for the file entry. + #[must_use] + #[inline] + pub fn compression_method(mut self, compression_method: CompressionMethod) -> Self { + self.compression_method = compression_method; + self + } + + /// Sets the modification time for the file entry. + /// + /// Only accepts UTC timestamps to ensure Extended Timestamp fields are written correctly. + #[must_use] + #[inline] + pub fn last_modified(mut self, modification_time: UtcDateTime) -> Self { + self.modification_time = Some(modification_time); + self + } + + /// Sets the Unix permissions for the file entry. + /// + /// Accepts either: + /// - Basic permission bits (e.g., 0o644 for rw-r--r--, 0o755 for rwxr-xr-x) + /// - Full Unix mode including file type (e.g., 0o100644 for regular file, 0o040755 for directory) + /// - Special permission bits are preserved (SUID: 0o4000, SGID: 0o2000, sticky: 0o1000) + /// + /// When set, the archive will be created with Unix-compatible "version made by" field + /// to ensure proper interpretation of the permissions by zip readers. + #[must_use] + #[inline] + pub fn unix_permissions(mut self, permissions: u32) -> Self { + self.unix_permissions = Some(permissions); + self + } + + /// Adds an extra field to this file entry. + /// + /// Extra fields contain additional metadata about files in ZIP archives, + /// such as timestamps, alignment information, and platform-specific data. + /// + /// No deduplication is performed - duplicate field IDs will result in + /// multiple entries + /// + /// Will return an error if the total size exceeds 65,535 bytes for the + /// specified headers. + /// + /// Rawzip will automatically add extra fields: + /// + /// - `EXTENDED_TIMESTAMP` when `last_modified()` is set + /// - `ZIP64` when 32-bit thresholds are met + /// + /// # Examples + /// + /// Create files with different extra field headers and verify the + /// behavior. Only the central directory is checked. To check the local + /// extra fields, see + /// [`ZipEntry::local_header`](crate::ZipEntry::local_header) + /// + /// ```rust + /// # use std::io::{Cursor, Write}; + /// # use soapberry_zip::{ZipArchive, ZipArchiveWriter, ZipDataWriter, extra_fields::ExtraFieldId, Header}; + /// let mut output = Cursor::new(Vec::new()); + /// let mut archive = ZipArchiveWriter::new(&mut output); + /// + /// let my_custom_field = ExtraFieldId::new(0x6666); + /// + /// // File with extra fields only in the local file header + /// let mut local_file = archive.new_file("video.mp4") + /// .extra_field(my_custom_field, b"field1", Header::LOCAL)? + /// .create()?; + /// let mut writer = ZipDataWriter::new(&mut local_file); + /// writer.write_all(b"video data")?; + /// let (_, desc) = writer.finish()?; + /// local_file.finish(desc)?; + /// + /// // File with extra fields only in the central directory + /// let mut central_file = archive.new_file("document.pdf") + /// .extra_field(my_custom_field, b"field2", Header::CENTRAL)? + /// .create()?; + /// let mut writer = ZipDataWriter::new(&mut central_file); + /// writer.write_all(b"PDF content")?; + /// let (_, desc) = writer.finish()?; + /// central_file.finish(desc)?; + /// + /// // File with extra fields in both headers for maximum compatibility + /// assert_eq!(Header::default(), Header::LOCAL | Header::CENTRAL); + /// let mut both_file = archive.new_file("important.dat") + /// .extra_field(my_custom_field, b"field3", Header::default())? + /// .create()?; + /// let mut writer = ZipDataWriter::new(&mut both_file); + /// writer.write_all(b"important data")?; + /// let (_, desc) = writer.finish()?; + /// both_file.finish(desc)?; + /// + /// archive.finish()?; + /// + /// // Verify the behavior when reading back the central directory + /// let zip_data = output.into_inner(); + /// let archive = ZipArchive::from_slice(&zip_data)?; + /// + /// for entry_result in archive.entries() { + /// let entry = entry_result?; + /// + /// // Find our custom field in the central directory + /// let custom_field_data = entry.extra_fields() + /// .find(|(id, _)| *id == my_custom_field) + /// .map(|(_, data)| data); + /// + /// match entry.file_path().as_ref() { + /// b"video.mp4" => { + /// // local only field should not be in central directory + /// assert_eq!(custom_field_data, None); + /// } + /// b"document.pdf" => { + /// // central only field should be in central directory + /// assert_eq!(custom_field_data, Some(b"field2".as_slice())); + /// } + /// b"important.dat" => { + /// // both location field should be in central directory + /// assert_eq!(custom_field_data, Some(b"field3".as_slice())); + /// } + /// _ => {} + /// } + /// } + /// # Ok::<(), Box>(()) + /// ``` + pub fn extra_field( + mut self, + id: ExtraFieldId, + data: &[u8], + location: Header, + ) -> Result { + self.extra_fields.add_field(id, data, location)?; + Ok(self) + } + + /// Sets the CRC32 calculation option for the file entry. + /// + /// By default, CRC32 is calculated automatically from the data. Use this + /// method to: + /// + /// - Skip CRC32 calculation entirely (for performance or when verification + /// isn't desired) + /// - Provide a pre-calculated CRC32 value + #[must_use] + #[inline] + pub fn crc32(mut self, crc32_option: Crc32Option) -> Self { + self.crc32_option = crc32_option; + self + } + + /// Creates the file entry and returns a writer for the file's content. + #[deprecated( + since = "0.4.0", + note = "Use `start()` method instead as it allows for more flexibility (ie: CRC configuration)" + )] + pub fn create(self) -> Result, Error> { + let (entry_writer, _) = self.start()?; + Ok(entry_writer) + } + + /// Mark the start of file data + /// + /// Returns a tuple: + /// + /// - `entry` handles the ZIP format and writes compressed data to the archive + /// - `config` constructs data writers that handle uncompressed data and CRC32 calculation + /// + /// # Examples + /// + /// For stored (uncompressed) files: + /// ``` + /// # use std::io::Write; + /// # let mut output = std::io::Cursor::new(Vec::new()); + /// # let mut archive = soapberry_zip::ZipArchiveWriter::new(&mut output); + /// let (mut entry, config) = archive.new_file("file.txt").start().unwrap(); + /// let mut writer = config.wrap(&mut entry); + /// writer.write_all(b"Hello").unwrap(); + /// let (_, output) = writer.finish().unwrap(); + /// entry.finish(output).unwrap(); + /// # archive.finish().unwrap(); + /// ``` + /// + /// For deflate compression: + /// ``` + /// # use std::io::Write; + /// # let mut output = std::io::Cursor::new(Vec::new()); + /// # let mut archive = soapberry_zip::ZipArchiveWriter::new(&mut output); + /// let (mut entry, config) = archive.new_file("file.txt").start().unwrap(); + /// let encoder = flate2::write::DeflateEncoder::new(&mut entry, flate2::Compression::default()); + /// let mut writer = config.wrap(encoder); + /// writer.write_all(b"Hello").unwrap(); + /// let (encoder, output) = writer.finish().unwrap(); + /// encoder.finish().unwrap(); + /// entry.finish(output).unwrap(); + /// # archive.finish().unwrap(); + /// ``` + pub fn start(self) -> Result<(ZipEntryWriter<'archive, W>, ZipDataWriterConfig), Error> { + let crc32_option = self.crc32_option; + let options = ZipEntryOptions { + compression_method: self.compression_method, + modification_time: self.modification_time, + unix_permissions: self.unix_permissions, + extra_fields: self.extra_fields, + }; + let entry_writer = self.archive.new_file_with_options(self.name, options)?; + + let data_writer_config = ZipDataWriterConfig { crc32_option }; + + Ok((entry_writer, data_writer_config)) + } +} + +/// A builder for creating a new directory entry in a ZIP archive. +#[derive(Debug)] +pub struct ZipDirBuilder<'a, W> { + archive: &'a mut ZipArchiveWriter, + name: &'a str, + modification_time: Option, + unix_permissions: Option, + extra_fields: ExtraFieldsContainer, +} + +impl ZipDirBuilder<'_, W> +where + W: Write, +{ + /// Sets the modification time for the directory entry. + /// + /// See [`ZipFileBuilder::last_modified`] for details. + #[must_use] + #[inline] + pub fn last_modified(mut self, modification_time: UtcDateTime) -> Self { + self.modification_time = Some(modification_time); + self + } + + /// Sets the Unix permissions for the directory entry. + /// + /// See [`ZipFileBuilder::unix_permissions`] for details. + #[must_use] + #[inline] + pub fn unix_permissions(mut self, permissions: u32) -> Self { + self.unix_permissions = Some(permissions); + self + } + + /// Adds an extra field to this directory entry. + /// + /// See [`ZipFileBuilder::extra_field`] for details and examples. + /// The same behavior notes apply: append-only, no deduplication, and automatic fields. + pub fn extra_field( + mut self, + id: ExtraFieldId, + data: &[u8], + location: Header, + ) -> Result { + self.extra_fields.add_field(id, data, location)?; + Ok(self) + } + + /// Creates the directory entry. + pub fn create(self) -> Result<(), Error> { + let options = ZipEntryOptions { + compression_method: CompressionMethod::Store, // Directories always use Store + modification_time: self.modification_time, + unix_permissions: self.unix_permissions, + extra_fields: self.extra_fields, + }; + self.archive.new_dir_with_options(self.name, options) + } +} + +impl ZipArchiveWriter +where + W: Write, +{ + pub fn write_stored_file(&mut self, name: &str, data: &[u8]) -> Result<(), Error> { + let file_path = ZipFilePath::from_str(name.trim_end_matches('/')); + + if file_path.len() > u16::MAX as usize { + return Err(Error::from(ErrorKind::InvalidInput { + msg: "file name too long".to_string(), + })); + } + + let local_header_offset = self.writer.count(); + let mut flags = 0u16; + if file_path.needs_utf8_encoding() { + flags |= FLAG_UTF8_ENCODING; + } + + let name_bytes = file_path.as_ref().as_bytes(); + let name_len = name_bytes.len() as u16; + self.file_names.extend_from_slice(name_bytes); + + let crc32 = crc::crc32(data); + let size_u64 = data.len() as u64; + if size_u64 >= ZIP64_THRESHOLD_FILE_SIZE { + return Err(Error::from(ErrorKind::InvalidInput { + msg: "stored file too large".to_string(), + })); + } + + let header = ZipLocalFileHeaderFixed { + signature: ZipLocalFileHeaderFixed::SIGNATURE, + version_needed: 20, + flags, + compression_method: CompressionMethod::Store.as_id(), + last_mod_time: 0, + last_mod_date: 0, + crc32, + compressed_size: size_u64 as u32, + uncompressed_size: size_u64 as u32, + file_name_len: file_path.len() as u16, + extra_field_len: 0, + }; + + header.write(&mut self.writer)?; + self.writer.write_all(file_path.as_ref().as_bytes())?; + self.writer.write_all(data)?; + + let mut file_header = FileHeader { + name_len, + compression_method: CompressionMethod::Store, + local_header_offset, + compressed_size: size_u64, + uncompressed_size: size_u64, + crc: crc32, + flags, + modification_time: None, + unix_permissions: None, + extra_fields: ExtraFieldsContainer::new(), + }; + file_header.finalize_extra_fields()?; + self.files.push(file_header); + + Ok(()) + } + + /// Writes a local file header with filtered extra fields. + fn write_local_header( + &mut self, + file_path: &ZipFilePath, + flags: u16, + compression_method: CompressionMethod, + options: &mut ZipEntryOptions, + ) -> Result<(), Error> { + // Get DOS timestamp from options or use 0 as default + let (dos_time, dos_date) = options + .modification_time + .as_ref() + .map(|dt| DosDateTime::from(dt).into_parts()) + .unwrap_or((0, 0)); + + if let Some(datetime) = options.modification_time.as_ref() { + let unix_time = datetime.to_unix().max(0) as u32; + let mut data = [0u8; 5]; + data[0] = 1; // Flags: modification time present + data[1..].copy_from_slice(&unix_time.to_le_bytes()); + options.extra_fields.add_field( + ExtraFieldId::EXTENDED_TIMESTAMP, + &data, + Header::CENTRAL, + )?; + } + + let header = ZipLocalFileHeaderFixed { + signature: ZipLocalFileHeaderFixed::SIGNATURE, + version_needed: 20, + flags, + compression_method: compression_method.as_id(), + last_mod_time: dos_time, + last_mod_date: dos_date, + crc32: 0, // must be zero if data descriptor is used (4.4.4) + compressed_size: 0, + uncompressed_size: 0, + file_name_len: file_path.len() as u16, + extra_field_len: options.extra_fields.local_size, + }; + + header.write(&mut self.writer)?; + self.writer.write_all(file_path.as_ref().as_bytes())?; + options + .extra_fields + .write_extra_fields(&mut self.writer, Header::LOCAL)?; + Ok(()) + } + + /// Creates a builder for adding a new directory to the archive. + /// + /// The name of the directory must end with a `/`. + /// + /// # Example + /// + /// ```rust + /// # use std::io::Cursor; + /// # let mut output = Cursor::new(Vec::new()); + /// # let mut archive = soapberry_zip::ZipArchiveWriter::new(&mut output); + /// archive.new_dir("my-dir/") + /// .unix_permissions(0o755) + /// .create()?; + /// # Ok::<(), Box>(()) + /// ``` + #[must_use] + pub fn new_dir<'a>(&'a mut self, name: &'a str) -> ZipDirBuilder<'a, W> { + ZipDirBuilder { + archive: self, + name, + modification_time: None, + unix_permissions: None, + extra_fields: ExtraFieldsContainer::new(), + } + } + + /// Adds a new directory to the archive with options (internal method). + /// + /// The name of the directory must end with a `/`. + fn new_dir_with_options( + &mut self, + name: &str, + mut options: ZipEntryOptions, + ) -> Result<(), Error> { + let file_path = ZipFilePath::from_str(name); + if !file_path.is_dir() { + return Err(Error::from(ErrorKind::InvalidInput { + msg: "not a directory".to_string(), + })); + } + + if file_path.len() > u16::MAX as usize { + return Err(Error::from(ErrorKind::InvalidInput { + msg: "directory name too long".to_string(), + })); + } + + let local_header_offset = self.writer.count(); + let mut flags = 0u16; + if file_path.needs_utf8_encoding() { + flags |= FLAG_UTF8_ENCODING; + } else { + flags &= !FLAG_UTF8_ENCODING; + } + + // Store the name bytes in the central buffer + let name_bytes = file_path.as_ref().as_bytes(); + let name_len = name_bytes.len() as u16; + self.file_names.extend_from_slice(name_bytes); + + self.write_local_header(&file_path, flags, CompressionMethod::Store, &mut options)?; + + let file_header = FileHeader { + name_len, + compression_method: CompressionMethod::Store, + local_header_offset, + compressed_size: 0, + uncompressed_size: 0, + crc: 0, + flags, + modification_time: options.modification_time, + unix_permissions: options.unix_permissions, + extra_fields: options.extra_fields, + }; + self.files.push(file_header); + + Ok(()) + } + + /// Creates a builder for adding a new file to the archive. + /// + /// # Example + /// + /// ```rust + /// # use std::io::{Cursor, Write}; + /// # let mut output = Cursor::new(Vec::new()); + /// # let mut archive = soapberry_zip::ZipArchiveWriter::new(&mut output); + /// let (mut entry, config) = archive.new_file("my-file") + /// .compression_method(soapberry_zip::CompressionMethod::Deflate) + /// .unix_permissions(0o644) + /// .start()?; + /// let mut writer = config.wrap(&mut entry); + /// writer.write_all(b"Hello, world!")?; + /// let (_, output) = writer.finish()?; + /// entry.finish(output)?; + /// # Ok::<(), Box>(()) + /// ``` + #[must_use] + pub fn new_file<'name>(&mut self, name: &'name str) -> ZipFileBuilder<'_, 'name, W> { + ZipFileBuilder { + archive: self, + name, + compression_method: CompressionMethod::Store, + modification_time: None, + unix_permissions: None, + extra_fields: ExtraFieldsContainer::new(), + crc32_option: Crc32Option::default(), + } + } + + /// Adds a new file to the archive with options (internal method). + fn new_file_with_options( + &mut self, + name: &str, + mut options: ZipEntryOptions, + ) -> Result, Error> { + let file_path = ZipFilePath::from_str(name.trim_end_matches('/')); + + if file_path.len() > u16::MAX as usize { + return Err(Error::from(ErrorKind::InvalidInput { + msg: "file name too long".to_string(), + })); + } + + let local_header_offset = self.writer.count(); + let mut flags = FLAG_DATA_DESCRIPTOR; + if file_path.needs_utf8_encoding() { + flags |= FLAG_UTF8_ENCODING; + } else { + flags &= !FLAG_UTF8_ENCODING; + } + + // Store the name bytes in the central buffer + let name_bytes = file_path.as_ref().as_bytes(); + let name_len = name_bytes.len() as u16; + self.file_names.extend_from_slice(name_bytes); + + self.write_local_header(&file_path, flags, options.compression_method, &mut options)?; + + Ok(ZipEntryWriter { + inner: self, + compressed_bytes: 0, + name_len, + local_header_offset, + compression_method: options.compression_method, + flags, + modification_time: options.modification_time, + unix_permissions: options.unix_permissions, + extra_fields: options.extra_fields, + }) + } + + /// Finishes writing the archive and returns the underlying writer. + /// + /// This writes the central directory and the end of central directory + /// record. ZIP64 format is used automatically when thresholds are exceeded. + pub fn finish(mut self) -> Result + where + W: Write, + { + let central_directory_offset = self.writer.count(); + let total_entries = self.files.len(); + + // Determine if we need ZIP64 format + let needs_zip64 = total_entries >= ZIP64_THRESHOLD_ENTRIES + || central_directory_offset >= ZIP64_THRESHOLD_OFFSET + || self.files.iter().any(|f| f.needs_zip64()); + + let mut name_offset = 0; + + // Write central directory entries + for file in &self.files { + // Version made by and version needed to extract + let version_needed = if file.needs_zip64() { + ZIP64_VERSION_NEEDED + } else { + 20 + }; + + // Set version_made_by to indicate Unix when Unix permissions are present + let version_made_by_hi = file.unix_permissions.map(|_| CREATOR_UNIX).unwrap_or(0); + let version_made_by = (version_made_by_hi << 8) | version_needed; + + let (dos_time, dos_date) = file + .modification_time + .as_ref() + .map(|dt| DosDateTime::from(dt).into_parts()) + .unwrap_or((0, 0)); + + let header = ZipFileHeaderFixed { + signature: CENTRAL_HEADER_SIGNATURE, + version_made_by, + version_needed, + flags: file.flags, + compression_method: file.compression_method.as_id(), + last_mod_time: dos_time, + last_mod_date: dos_date, + crc32: file.crc, + compressed_size: file.compressed_size.min(ZIP64_THRESHOLD_FILE_SIZE) as u32, + uncompressed_size: file.uncompressed_size.min(ZIP64_THRESHOLD_FILE_SIZE) as u32, + file_name_len: file.name_len, + extra_field_len: file.extra_fields.central_size, + file_comment_len: 0, + disk_number_start: 0, + internal_file_attrs: 0, + external_file_attrs: file.unix_permissions.map(|x| x << 16).unwrap_or(0), + local_header_offset: file.local_header_offset.min(ZIP64_THRESHOLD_OFFSET) as u32, + }; + + header.write(&mut self.writer)?; + + // File name + let new_name_offset = name_offset + file.name_len as usize; + self.writer + .write_all(&self.file_names[name_offset..new_name_offset])?; + name_offset = new_name_offset; + + // Extra fields + file.extra_fields + .write_extra_fields(&mut self.writer, Header::CENTRAL)?; + } + + let central_directory_end = self.writer.count(); + let central_directory_size = central_directory_end - central_directory_offset; + + // Write ZIP64 structures if needed + if needs_zip64 { + let zip64_eocd_offset = self.writer.count(); + + // Write ZIP64 End of Central Directory Record + write_zip64_eocd( + &mut self.writer, + total_entries as u64, + central_directory_size, + central_directory_offset, + )?; + + // Write ZIP64 End of Central Directory Locator + write_zip64_eocd_locator(&mut self.writer, zip64_eocd_offset)?; + } + + // Write regular End of Central Directory Record + self.writer.write_all(&END_OF_CENTRAL_DIR_SIGNAUTRE_BYTES)?; + + // Disk numbers + self.writer.write_all(&[0u8; 4])?; + + // Number of entries - use 0xFFFF if ZIP64 + let entries_count = total_entries.min(ZIP64_THRESHOLD_ENTRIES) as u16; + self.writer.write_all(&entries_count.to_le_bytes())?; + self.writer.write_all(&entries_count.to_le_bytes())?; + + // Central directory size - use 0xFFFFFFFF if ZIP64 + let cd_size = central_directory_size.min(ZIP64_THRESHOLD_OFFSET) as u32; + self.writer.write_all(&cd_size.to_le_bytes())?; + + // Central directory offset - use 0xFFFFFFFF if ZIP64 + let cd_offset = central_directory_offset.min(ZIP64_THRESHOLD_OFFSET) as u32; + self.writer.write_all(&cd_offset.to_le_bytes())?; + + // Comment length + self.writer.write_all(&0u16.to_le_bytes())?; + + self.writer.flush()?; + Ok(self.writer.writer) + } +} + +/// A writer for a file in a ZIP archive. +/// +/// This writer is created by `ZipArchiveWriter::new_file`. +/// Data written to this writer is compressed and written to the underlying archive. +/// +/// After writing all data, call `finish` to complete the entry. +#[derive(Debug)] +pub struct ZipEntryWriter<'a, W> { + inner: &'a mut ZipArchiveWriter, + compressed_bytes: u64, + name_len: u16, + local_header_offset: u64, + compression_method: CompressionMethod, + flags: u16, + modification_time: Option, + unix_permissions: Option, + extra_fields: ExtraFieldsContainer, +} + +/// Configuration for creating data writers that handle uncompressed data and CRC32 calculation. +#[derive(Debug)] +pub struct ZipDataWriterConfig { + crc32_option: Crc32Option, +} + +impl ZipDataWriterConfig { + /// Wraps an encoder with a data writer configured with this builder's options. + pub fn wrap(self, encoder: E) -> ZipDataWriter { + ZipDataWriter::with_crc32(encoder, self.crc32_option) + } +} + +impl<'a, W> ZipEntryWriter<'a, W> { + /// Returns the total number of bytes successfully written (bytes out). + pub fn compressed_bytes(&self) -> u64 { + self.compressed_bytes + } + + /// Returns the current offset in the output stream. + /// + /// See [`ZipArchiveWriter::stream_offset`] for more information. + pub fn stream_offset(&self) -> u64 { + self.inner.stream_offset() + } + + /// Finishes writing the file entry. + /// + /// This writes the data descriptor if necessary and adds the file entry to the central directory. + pub fn finish(self, mut output: DataDescriptorOutput) -> Result + where + W: Write, + { + output.compressed_size = self.compressed_bytes; + let mut buffer = [0u8; 24]; + buffer[0..4].copy_from_slice(&DataDescriptor::SIGNATURE.to_le_bytes()); + buffer[4..8].copy_from_slice(&output.crc.to_le_bytes()); + + let out_data = if output.compressed_size >= ZIP64_THRESHOLD_FILE_SIZE + || output.uncompressed_size >= ZIP64_THRESHOLD_FILE_SIZE + { + // Use 64-bit sizes for ZIP64 + buffer[8..16].copy_from_slice(&output.compressed_size.to_le_bytes()); + buffer[16..24].copy_from_slice(&output.uncompressed_size.to_le_bytes()); + &buffer[..] + } else { + // Use 32-bit sizes for standard ZIP + buffer[8..12].copy_from_slice(&(output.compressed_size as u32).to_le_bytes()); + buffer[12..16].copy_from_slice(&(output.uncompressed_size as u32).to_le_bytes()); + &buffer[..16] + }; + + self.inner.writer.write_all(out_data)?; + + let mut file_header = FileHeader { + name_len: self.name_len, + compression_method: self.compression_method, + local_header_offset: self.local_header_offset, + compressed_size: output.compressed_size, + uncompressed_size: output.uncompressed_size, + crc: output.crc, + flags: self.flags, + modification_time: self.modification_time, + unix_permissions: self.unix_permissions, + extra_fields: self.extra_fields, + }; + file_header.finalize_extra_fields()?; + self.inner.files.push(file_header); + + Ok(self.compressed_bytes) + } +} + +impl Write for ZipEntryWriter<'_, W> +where + W: Write, +{ + fn write(&mut self, buf: &[u8]) -> io::Result { + let bytes_written = self.inner.writer.write(buf)?; + self.compressed_bytes += bytes_written as u64; + Ok(bytes_written) + } + + fn flush(&mut self) -> io::Result<()> { + self.inner.writer.flush() + } +} + +/// A writer for the uncompressed data of a Zip file entry. +/// +/// This writer will keep track of the data necessary to write the data +/// descriptor (ie: number of bytes written and the CRC32 checksum). +/// +/// Once all the data has been written, invoke the `finish` method to receive the +/// `DataDescriptorOutput` necessary to finalize the entry. +#[derive(Debug)] +pub struct ZipDataWriter { + inner: W, + uncompressed_bytes: u64, + crc: u32, + crc32_option: Crc32Option, +} + +impl ZipDataWriter { + /// Creates a new `ZipDataWriter` that writes to an underlying writer. + #[deprecated( + since = "0.4.0", + note = "Use the tuple-based API: `ZipFileBuilder::start()` returns `(writer, builder)` which can propagate the CRC32 option" + )] + pub fn new(inner: W) -> Self { + Self::with_crc32_option(inner, Crc32Option::default()) + } + + /// Creates a new `ZipDataWriter` with the specified CRC32 option. + /// + /// This is an internal method. Use the tuple-based API via + /// `ZipFileBuilder::start()` instead. + pub(crate) fn with_crc32(inner: W, crc32_option: Crc32Option) -> Self { + Self::with_crc32_option(inner, crc32_option) + } + + /// Creates a new `ZipDataWriter` with a specific CRC32 calculation option. + fn with_crc32_option(inner: W, crc32_option: Crc32Option) -> Self { + let crc = crc32_option.initial_value(); + ZipDataWriter { + inner, + uncompressed_bytes: 0, + crc, + crc32_option, + } + } + + /// Gets a mutable reference to the underlying writer. + pub fn get_mut(&mut self) -> &mut W { + &mut self.inner + } + + /// Consumes self and returns the inner writer and the data descriptor to be + /// passed to a `ZipEntryWriter`. + /// + /// The writer is returned to facilitate situations where the underlying + /// compressor needs to be notified that no more data will be written so it + /// can write any sort of necessary epilogue (think zstd). + /// + /// The `DataDescriptorOutput` contains the CRC32 checksum and uncompressed size, + /// which is needed by `ZipEntryWriter::finish`. + pub fn finish(mut self) -> Result<(W, DataDescriptorOutput), Error> + where + W: Write, + { + self.flush()?; + let output = DataDescriptorOutput { + crc: self.crc, + compressed_size: 0, + uncompressed_size: self.uncompressed_bytes, + }; + + Ok((self.inner, output)) + } +} + +impl Write for ZipDataWriter +where + W: Write, +{ + fn write(&mut self, buf: &[u8]) -> io::Result { + let bytes_written = self.inner.write(buf)?; + self.uncompressed_bytes += bytes_written as u64; + + // Only calculate CRC32 if the option is Calculate + if matches!(self.crc32_option, Crc32Option::Calculate) { + self.crc = crc::crc32_chunk(&buf[..bytes_written], self.crc); + } + + Ok(bytes_written) + } + + fn flush(&mut self) -> io::Result<()> { + self.inner.flush() + } +} + +/// Contains information written in the data descriptor after the file data. +#[derive(Debug, Clone)] +pub struct DataDescriptorOutput { + crc: u32, + compressed_size: u64, + uncompressed_size: u64, +} + +impl DataDescriptorOutput { + /// Returns the CRC32 checksum of the uncompressed data. + pub fn crc(&self) -> u32 { + self.crc + } + + /// Returns the uncompressed size of the data. + pub fn uncompressed_size(&self) -> u64 { + self.uncompressed_size + } +} + +#[derive(Debug)] +struct FileHeader { + name_len: u16, + compression_method: CompressionMethod, + local_header_offset: u64, + compressed_size: u64, + uncompressed_size: u64, + crc: u32, + flags: u16, + modification_time: Option, + unix_permissions: Option, + extra_fields: ExtraFieldsContainer, +} + +impl FileHeader { + fn needs_zip64(&self) -> bool { + self.compressed_size >= ZIP64_THRESHOLD_FILE_SIZE + || self.uncompressed_size >= ZIP64_THRESHOLD_FILE_SIZE + || self.local_header_offset >= ZIP64_THRESHOLD_OFFSET + } + + fn finalize_extra_fields(&mut self) -> Result<(), Error> { + if self.needs_zip64() { + let mut sink = [0u8; 24]; + let mut pos = 0; + if self.uncompressed_size >= ZIP64_THRESHOLD_FILE_SIZE { + sink[pos..pos + 8].copy_from_slice(&self.uncompressed_size.to_le_bytes()); + pos += 8; + } + if self.compressed_size >= ZIP64_THRESHOLD_FILE_SIZE { + sink[pos..pos + 8].copy_from_slice(&self.compressed_size.to_le_bytes()); + pos += 8; + } + if self.local_header_offset >= ZIP64_THRESHOLD_OFFSET { + sink[pos..pos + 8].copy_from_slice(&self.local_header_offset.to_le_bytes()); + pos += 8; + } + self.extra_fields + .add_field(ExtraFieldId::ZIP64, &sink[..pos], Header::CENTRAL)?; + } + + Ok(()) + } +} + +/// Writes the ZIP64 End of Central Directory Record +fn write_zip64_eocd( + writer: &mut W, + total_entries: u64, + central_directory_size: u64, + central_directory_offset: u64, +) -> Result<(), Error> +where + W: Write, +{ + // ZIP64 End of Central Directory Record signature + writer.write_all(&END_OF_CENTRAL_DIR_SIGNATURE64.to_le_bytes())?; + + // Size of ZIP64 end of central directory record (excluding signature and this field) + let record_size = (ZIP64_EOCD_SIZE - 12) as u64; + writer.write_all(&record_size.to_le_bytes())?; + + // Version made by + writer.write_all(&ZIP64_VERSION_NEEDED.to_le_bytes())?; + + // Version needed to extract + writer.write_all(&ZIP64_VERSION_NEEDED.to_le_bytes())?; + + // Number of this disk + writer.write_all(&0u32.to_le_bytes())?; + + // Number of the disk with the start of the central directory + writer.write_all(&0u32.to_le_bytes())?; + + // Total number of entries in the central directory on this disk + writer.write_all(&total_entries.to_le_bytes())?; + + // Total number of entries in the central directory + writer.write_all(&total_entries.to_le_bytes())?; + + // Size of the central directory + writer.write_all(¢ral_directory_size.to_le_bytes())?; + + // Offset of start of central directory with respect to the starting disk number + writer.write_all(¢ral_directory_offset.to_le_bytes())?; + + Ok(()) +} + +/// Writes the ZIP64 End of Central Directory Locator +fn write_zip64_eocd_locator(writer: &mut W, zip64_eocd_offset: u64) -> Result<(), Error> +where + W: Write, +{ + // ZIP64 End of Central Directory Locator signature + writer.write_all(&END_OF_CENTRAL_DIR_LOCATOR_SIGNATURE.to_le_bytes())?; + + // Number of the disk with the start of the ZIP64 end of central directory + writer.write_all(&0u32.to_le_bytes())?; + + // Relative offset of the ZIP64 end of central directory record + writer.write_all(&zip64_eocd_offset.to_le_bytes())?; + + // Total number of disks + writer.write_all(&1u32.to_le_bytes())?; + + Ok(()) +} + +#[derive(Debug, Clone)] +struct ZipEntryOptions { + compression_method: CompressionMethod, + modification_time: Option, + unix_permissions: Option, + extra_fields: ExtraFieldsContainer, +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ZipArchive; + use std::io::Cursor; + + #[test] + fn test_name_lifetime_independence() { + let mut output = Cursor::new(Vec::new()); + let mut archive = ZipArchiveWriter::new(&mut output); + + // Test file builder with temporary name + { + let (mut entry, config) = { + let temp_name = format!("temp-{}.txt", 42); + archive.new_file(&temp_name).start().unwrap() + }; + let mut writer = config.wrap(&mut entry); + writer.write_all(b"test").unwrap(); + let (_, desc) = writer.finish().unwrap(); + entry.finish(desc).unwrap(); + } + + archive.finish().unwrap(); + } + + #[test] + fn test_builder_with_offset_and_capacity() { + let mut output = Cursor::new(Vec::new()); + + output.write_all(b"PREFIX DATA").unwrap(); + let offset = output.position(); + + let mut archive = ZipArchiveWriterBuilder::new() + .with_capacity(5) + .with_offset(offset) + .build(&mut output); + + let (mut entry, config) = archive.new_file("test.txt").start().unwrap(); + let mut writer = config.wrap(&mut entry); + writer.write_all(b"Hello World").unwrap(); + let (_, desc) = writer.finish().unwrap(); + entry.finish(desc).unwrap(); + + archive.finish().unwrap(); + } + + #[test] + fn test_stream_offset_methods() { + let mut output = Cursor::new(Vec::new()); + let mut archive = ZipArchiveWriter::new(&mut output); + + // Test case 1: Get local header offset + let local_header_offset = archive.stream_offset(); + let (mut file, config) = archive.new_file("test.txt").start().unwrap(); + + // Test case 2: Get start of data offset + let data_start_offset = file.stream_offset(); + + // Write some data + let mut writer = config.wrap(&mut file); + writer.write_all(b"Hello World").unwrap(); + let (_, desc) = writer.finish().unwrap(); + + // Test case 3: Get end of compressed data offset + let end_data_offset = file.stream_offset(); + + let compressed_bytes = file.finish(desc).unwrap(); + + // Test case 4: Get end of data descriptor offset (next file's local header offset) + let end_descriptor_offset = archive.stream_offset(); + + archive.finish().unwrap(); + + // Verify the offsets make sense + assert_eq!(local_header_offset, 0); + assert!(data_start_offset > local_header_offset); + assert_eq!( + end_data_offset, + data_start_offset + b"Hello World".len() as u64 + ); + assert_eq!(end_descriptor_offset, end_data_offset + 16); // 16 bytes for data descriptor + assert_eq!(compressed_bytes, end_data_offset - data_start_offset); + } + + #[test] + fn test_crc32_options() { + use std::io::Write; + + let data = b"Hello, world!"; + let correct_crc = crate::crc32(data); + let incorrect_crc = 0x12345678u32; + + // Test with default CRC calculation + { + let mut output = Cursor::new(Vec::new()); + let mut archive = ZipArchiveWriter::new(&mut output); + let (mut entry, config) = archive.new_file("normal.txt").start().unwrap(); + let mut writer = config.wrap(&mut entry); + writer.write_all(data).unwrap(); + let (_, descriptor) = writer.finish().unwrap(); + entry.finish(descriptor).unwrap(); + archive.finish().unwrap(); + } + + // Test with correct custom CRC - should succeed + { + let mut output = Cursor::new(Vec::new()); + let mut archive = ZipArchiveWriter::new(&mut output); + let (mut entry, config) = archive + .new_file("correct.txt") + .crc32(Crc32Option::Custom(correct_crc)) + .start() + .unwrap(); + let mut writer = config.wrap(&mut entry); + writer.write_all(data).unwrap(); + let (_, descriptor) = writer.finish().unwrap(); + entry.finish(descriptor).unwrap(); + archive.finish().unwrap(); + + // Verify the archive can be read + let output = output.into_inner(); + let archive = ZipArchive::from_slice(&output).unwrap(); + let mut entries = archive.entries(); + let entry = entries.next_entry().unwrap().unwrap(); + let wayfinder = entry.wayfinder(); + let entry = archive.get_entry(wayfinder).unwrap(); + let mut verifier = entry.verifying_reader(entry.data()); + let mut actual = Vec::new(); + std::io::copy(&mut verifier, &mut actual).unwrap(); + assert_eq!(&actual, data); + } + + // Test with incorrect custom CRC - verification should fail + { + let mut output = Cursor::new(Vec::new()); + let mut archive = ZipArchiveWriter::new(&mut output); + let (mut entry, config) = archive + .new_file("incorrect.txt") + .crc32(Crc32Option::Custom(incorrect_crc)) + .start() + .unwrap(); + let mut writer = config.wrap(&mut entry); + writer.write_all(data).unwrap(); + let (_, descriptor) = writer.finish().unwrap(); + entry.finish(descriptor).unwrap(); + archive.finish().unwrap(); + + // Verify the archive fails verification + let output = output.into_inner(); + let archive = ZipArchive::from_slice(&output).unwrap(); + let mut entries = archive.entries(); + let entry = entries.next_entry().unwrap().unwrap(); + let wayfinder = entry.wayfinder(); + let entry = archive.get_entry(wayfinder).unwrap(); + let mut verifier = entry.verifying_reader(entry.data()); + let mut actual = Vec::new(); + let result = std::io::copy(&mut verifier, &mut actual); + + // Verification should fail with InvalidChecksum error + assert!(result.is_err()); + let err = result.unwrap_err(); + assert_eq!(err.kind(), std::io::ErrorKind::InvalidData); + let source = err.into_inner().unwrap(); + let zip_error = source.downcast::().unwrap(); + match zip_error.kind() { + ErrorKind::InvalidChecksum { expected, actual } => { + assert_eq!(*expected, incorrect_crc); + assert_eq!(*actual, correct_crc); + }, + _ => panic!("Expected InvalidChecksum error, got {:?}", zip_error.kind()), + } + } + + // Test with skipped CRC - should have CRC of 0, and should validate fine + { + let mut output = Cursor::new(Vec::new()); + let mut archive = ZipArchiveWriter::new(&mut output); + let (mut entry, config) = archive + .new_file("skipped.txt") + .crc32(Crc32Option::Skip) + .start() + .unwrap(); + let mut writer = config.wrap(&mut entry); + writer.write_all(data).unwrap(); + let (_, descriptor) = writer.finish().unwrap(); + entry.finish(descriptor).unwrap(); + archive.finish().unwrap(); + + // Verify the archive can be read + let output = output.into_inner(); + let archive = ZipArchive::from_slice(&output).unwrap(); + let mut entries = archive.entries(); + let entry = entries.next_entry().unwrap().unwrap(); + let wayfinder = entry.wayfinder(); + let entry = archive.get_entry(wayfinder).unwrap(); + let mut verifier = entry.verifying_reader(entry.data()); + let mut actual = Vec::new(); + std::io::copy(&mut verifier, &mut actual).unwrap(); + assert_eq!(&actual, data); + } + } + + #[test] + fn test_tuple_api() { + use std::io::Write; + + let data = b"Hello, world!"; + let custom_crc = 0x12345678u32; + + // Test the new tuple-based API with custom CRC + let mut output = Cursor::new(Vec::new()); + let mut archive = ZipArchiveWriter::new(&mut output); + let (mut entry, config) = archive + .new_file("test.txt") + .crc32(Crc32Option::Custom(custom_crc)) + .start() + .unwrap(); + + // Using the new unified API - the CRC option is automatically configured + let mut writer = config.wrap(&mut entry); + writer.write_all(data).unwrap(); + let (_, descriptor) = writer.finish().unwrap(); + + // Verify the CRC was correctly applied + assert_eq!(descriptor.crc, custom_crc); + + entry.finish(descriptor).unwrap(); + archive.finish().unwrap(); + } + + #[test] + #[allow(deprecated)] + fn test_deprecated_create_method() { + use std::io::Write; + + let data = b"Hello, deprecated API!"; + + // Test that deprecated create() method still works + let mut output = Cursor::new(Vec::new()); + let mut archive = ZipArchiveWriter::new(&mut output); + let mut entry = archive.new_file("deprecated.txt").create().unwrap(); + let mut writer = ZipDataWriter::new(&mut entry); + writer.write_all(data).unwrap(); + let (_, descriptor) = writer.finish().unwrap(); + entry.finish(descriptor).unwrap(); + archive.finish().unwrap(); + + // Verify the archive can be read + let output = output.into_inner(); + let archive = ZipArchive::from_slice(&output).unwrap(); + let mut entries = archive.entries(); + let entry = entries.next_entry().unwrap().unwrap(); + let wayfinder = entry.wayfinder(); + let entry = archive.get_entry(wayfinder).unwrap(); + let mut verifier = entry.verifying_reader(entry.data()); + let mut actual = Vec::new(); + std::io::copy(&mut verifier, &mut actual).unwrap(); + assert_eq!(&actual, data); + } +} diff --git a/crates/xml-minifier/Cargo.toml b/crates/xml-minifier/Cargo.toml new file mode 100644 index 0000000..391a012 --- /dev/null +++ b/crates/xml-minifier/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "xml-minifier" +version = "0.1.0" +edition = "2024" + +[lib] +proc-macro = true + +[dependencies] +quick-xml = "0.38" +quote = "1.0" diff --git a/crates/xml-minifier/README.md b/crates/xml-minifier/README.md new file mode 100644 index 0000000..71ff918 --- /dev/null +++ b/crates/xml-minifier/README.md @@ -0,0 +1,147 @@ +# XML Minifier + +A high-performance Rust procedural macro for compile-time XML minification. + +## Features + +- **Compile-time minification**: XML files are minified during compilation, zero runtime overhead +- **Aggressive optimization**: + - Removes comments and processing instructions + - Trims and collapses whitespace in text nodes + - Collapses empty tags (`` → ``) + - Removes unnecessary whitespace between tags +- **Safe and standards-compliant**: Preserves XML structure and semantics +- **Fast**: Single-pass processing with efficient buffer reuse +- **Memory-efficient**: Pre-allocates buffers and uses zero-copy operations where possible + +## Usage + +Add to your `Cargo.toml`: + +```toml +[dependencies] +xml-minifier = { path = "../xml-minifier" } +``` + +Use the `minified_xml!` macro: + +```rust +use xml_minifier::minified_xml; + +// Minify an XML file at compile time +// Path is relative to the source file calling the macro +const TEMPLATE: &str = minified_xml!("template.xml"); + +fn main() { + println!("{}", TEMPLATE); +} +``` + +### Path Resolution + +**File paths are resolved relative to the source file** that invokes the macro. This makes it intuitive to keep XML files next to your Rust source code. + +#### Example Project Structure + +``` +my-project/ +├── Cargo.toml +└── src/ + ├── main.rs + ├── lib.rs + └── templates/ + ├── mod.rs + └── document.xml +``` + +In `src/templates/mod.rs`: +```rust +// XML file is in the same directory as this source file +const TEMPLATE: &str = minified_xml!("document.xml"); +``` + +In `src/lib.rs`: +```rust +// XML file is in the templates subdirectory +const TEMPLATE: &str = minified_xml!("templates/document.xml"); +``` + +## Example + +Given an XML file `template.xml`: + +```xml + + + + + Text content + + + +``` + +The macro produces: + +```xml +Text content +``` + +## Implementation Details + +### Whitespace Handling + +The minifier intelligently handles whitespace: +- Removes pure whitespace between tags +- Trims leading and trailing whitespace from text nodes +- Preserves text content + +### CDATA Sections + +CDATA sections are preserved as-is since they may contain formatting-sensitive content: + +```xml + with special chars]]> +``` + +### XML Declarations + +XML declarations are preserved with their attributes: + +```xml + +``` + +### DOCTYPE Declarations + +DOCTYPE declarations are preserved: + +```xml + +``` + +## Performance + +- **Zero runtime cost**: Minification happens at compile time +- **Efficient processing**: Single-pass with buffer reuse +- **Memory-efficient**: Pre-allocates approximately half the input size +- **Zero-copy where possible**: Uses `Cow<[u8]>` and byte slices + +# Tips for Rust Analyzer users + +Note that the procedure macro utilizes the `local_file()` function to access the source code file, +and rust-analyzer would not correctly handle the expansion due to its limitation. +In order not to produce tons of errors and warnings, add the following settings to your VS Code settings: + +```json +{ + "rust-analyzer.procMacro.ignored": { + "xml-minifier": ["minified_xml"] + } +} +``` + +## License + +This is part of the Litchi project so it is licensed under the same license that the project adopts. + diff --git a/crates/xml-minifier/src/lib.rs b/crates/xml-minifier/src/lib.rs new file mode 100644 index 0000000..4d31aeb --- /dev/null +++ b/crates/xml-minifier/src/lib.rs @@ -0,0 +1,1087 @@ +use proc_macro::{TokenStream, TokenTree}; +use quick_xml::Reader; +use quick_xml::events::{BytesStart, Event}; +use quote::quote; +use std::fs; +use std::path::Path; + +/// Minifies an XML string literal at compile time +/// +/// This macro performs aggressive XML minification including: +/// - Removing comments and processing instructions +/// - Trimming and collapsing whitespace in text nodes +/// - Collapsing empty tags (`` → ``) +/// - Removing unnecessary whitespace between tags +/// +/// Unlike [`minified_xml!`], this macro takes an XML string literal directly +/// instead of reading from a file. +/// +/// # Examples +/// +/// ```ignore +/// const TEMPLATE: &str = minified_xml_str!(r#" +/// +/// +/// +/// +/// Some text content +/// +/// +/// +/// "#); +/// // Result: Some text content +/// ``` +#[proc_macro] +pub fn minified_xml_str(input: TokenStream) -> TokenStream { + let xml_content = input_to_string(input); + + // Minify the XML + let minified = minify_xml(&xml_content) + .unwrap_or_else(|e| panic!("Failed to minify XML string literal: {}", e)); + + let expanded = quote! { + #minified + }; + + // Generate the output token stream + TokenStream::from(expanded) +} + +/// Minifies an XML file at compile time and embeds it as a string literal +/// +/// This macro performs aggressive XML minification including: +/// - Removing comments and processing instructions +/// - Trimming and collapsing whitespace in text nodes +/// - Collapsing empty tags (`` → ``) +/// - Removing unnecessary whitespace between tags +/// +/// # Path Resolution +/// +/// File paths are resolved **relative to the source file** that invokes the macro. +/// This allows for intuitive usage where XML files can be placed next to the source code. +/// +/// For minifying XML string literals directly, see [`minified_xml_str!`]. +/// +/// # Examples +/// +/// ```ignore +/// // If you have this structure: +/// // src/ +/// // templates/ +/// // mod.rs +/// // document.xml +/// // +/// // In templates/mod.rs: +/// const TEMPLATE: &str = minified_xml!("document.xml"); +/// +/// // Or in parent directory: +/// const TEMPLATE: &str = minified_xml!("templates/document.xml"); +/// ``` +#[proc_macro] +pub fn minified_xml(input: TokenStream) -> TokenStream { + let file_path = input_to_string(input); + + // Get the source file location where the macro was called + let call_site = proc_macro::Span::call_site(); + let source_file = call_site.local_file().expect("Failed to get local file"); + let target_path = source_file + .parent() + .expect("Failed to get parent directory of calling file") + .join(Path::new(&file_path)); + + // Canonicalize to get absolute path (helps with error messages and change detection) + let canonical_path = target_path + .canonicalize() + .unwrap_or_else(|e| panic!("Failed to canonicalize file path '{}': {}", file_path, e)); + + // Read the XML file + let xml_content = fs::read_to_string(&canonical_path).expect("Failed to read XML file"); + + // Minify the XML + let minified = minify_xml(&xml_content) + .unwrap_or_else(|e| panic!("Failed to minify XML from '{}': {}", file_path, e)); + + let expanded = quote! { + #minified + }; + + // Generate the output token stream + TokenStream::from(expanded) +} + +/// Minifies an XML template and formats it with arguments at runtime, with compile-time optimizations +/// +/// This macro combines XML minification with optimized string formatting: +/// - Minifies the XML template at compile time +/// - Pre-calculates sizes of static parts +/// - Pre-allocates exact memory needed +/// - Avoids format! macro overhead through direct string building +/// +/// The syntax is similar to `format!`, but the template is minified first. +/// +/// # Formatting Syntax +/// +/// - `{}` - Positional argument (uses `Display` trait) +/// - `{0}`, `{1}`, ... - Indexed positional argument +/// - `{name}` - Named argument +/// +/// # Examples +/// +/// ```ignore +/// // Basic usage with positional arguments +/// let name = "document"; +/// let version = "1.0"; +/// let xml = minified_xml_format!(r#" +/// +/// +/// +/// {} +/// +/// "#, version, name); +/// // Result: document +/// +/// // With named arguments +/// let xml = minified_xml_format!( +/// r#"{name}{age}"#, +/// name = "Alice", +/// age = 30 +/// ); +/// // Result: Alice30 +/// ``` +#[proc_macro] +pub fn minified_xml_format(input: TokenStream) -> TokenStream { + // Parse the input tokens + let tokens: Vec = input.into_iter().collect(); + + if tokens.is_empty() { + panic!("minified_xml_format! requires at least a format string"); + } + + // Extract the format string (first argument) + let format_str_literal = &tokens[0]; + let TokenTree::Literal(lit) = format_str_literal else { + panic!("First argument must be a string literal"); + }; + + let template = literal_to_string(lit.to_string()); + + // Replace format placeholders with temporary markers before minification + // This prevents the XML parser from being confused by {} characters + let (template_with_markers, placeholder_map) = replace_placeholders_with_markers(&template); + + // Minify the XML template + let minified = minify_xml(&template_with_markers) + .unwrap_or_else(|e| panic!("Failed to minify XML template: {}", e)); + + // Restore the placeholders + let minified_with_placeholders = restore_placeholders_from_markers(&minified, &placeholder_map); + + // Parse the remaining arguments + let args = if tokens.len() > 1 { + // Skip the first token (format string) and the comma + let mut arg_tokens = Vec::new(); + let mut i = 1; + + // Skip comma after format string + if let Some(TokenTree::Punct(p)) = tokens.get(i) + && p.as_char() == ',' + { + i += 1; + } + + while i < tokens.len() { + arg_tokens.push(tokens[i].clone()); + i += 1; + } + + TokenStream::from_iter(arg_tokens) + } else { + TokenStream::new() + }; + + // Parse the minified template to find format placeholders and static parts + let parts = parse_format_string(&minified_with_placeholders); + + // Generate optimized code + generate_format_code(&parts, args) +} + +/// Replace format placeholders with unique markers that won't confuse the XML parser +/// Returns the modified string and a map of marker -> placeholder +fn replace_placeholders_with_markers(template: &str) -> (String, Vec) { + let mut result = String::with_capacity(template.len()); + let mut placeholders = Vec::new(); + let mut chars = template.chars().peekable(); + + while let Some(ch) = chars.next() { + if ch == '{' { + // Check for escaped brace {{ + if chars.peek() == Some(&'{') { + chars.next(); + result.push_str("{{"); + continue; + } + + // Parse the placeholder content + let mut placeholder_content = String::new(); + placeholder_content.push('{'); + + loop { + match chars.next() { + Some('}') => { + placeholder_content.push('}'); + break; + }, + Some(ch) => placeholder_content.push(ch), + None => { + // Unclosed placeholder, just add what we have + result.push_str(&placeholder_content); + return (result, placeholders); + }, + } + } + + // Create a unique marker + let marker = format!("__PLACEHOLDER_{}__", placeholders.len()); + placeholders.push(placeholder_content); + result.push_str(&marker); + } else if ch == '}' { + // Check for escaped brace }} + if chars.peek() == Some(&'}') { + chars.next(); + result.push_str("}}"); + } else { + result.push('}'); + } + } else { + result.push(ch); + } + } + + (result, placeholders) +} + +/// Restore the original placeholders from markers +fn restore_placeholders_from_markers(minified: &str, placeholders: &[String]) -> String { + let mut result = minified.to_string(); + + // Replace markers back with original placeholders + for (idx, placeholder) in placeholders.iter().enumerate() { + let marker = format!("__PLACEHOLDER_{}__", idx); + result = result.replace(&marker, placeholder); + } + + result +} + +/// Represents a part of a format string +#[derive(Debug, Clone)] +enum FormatPart { + /// Static text that doesn't need formatting + Static(String), + /// A format placeholder (either positional index or named argument) + Placeholder(PlaceholderType), +} + +/// Type of format placeholder +#[derive(Debug, Clone)] +enum PlaceholderType { + /// Positional argument by index (e.g., {0}, {1}) + Positional(usize), + /// Named argument (e.g., {name}) + Named(String), + /// Next positional argument (e.g., {}) + NextPositional, +} + +/// Parse a format string into static parts and placeholders +fn parse_format_string(template: &str) -> Vec { + let mut parts = Vec::new(); + let mut current_static = String::new(); + let mut chars = template.chars().peekable(); + + while let Some(ch) = chars.next() { + if ch == '{' { + // Check for escaped brace {{ + if chars.peek() == Some(&'{') { + chars.next(); + current_static.push('{'); + continue; + } + + // Save any accumulated static text + if !current_static.is_empty() { + parts.push(FormatPart::Static(current_static.clone())); + current_static.clear(); + } + + // Parse the placeholder content + let mut placeholder_content = String::new(); + loop { + match chars.next() { + Some('}') => break, + Some(ch) => placeholder_content.push(ch), + None => panic!("Unclosed format placeholder in template"), + } + } + + // Determine placeholder type + let placeholder = if placeholder_content.is_empty() { + PlaceholderType::NextPositional + } else if placeholder_content.chars().all(|c| c.is_ascii_digit()) { + PlaceholderType::Positional( + placeholder_content + .parse() + .expect("Invalid positional index"), + ) + } else { + PlaceholderType::Named(placeholder_content) + }; + + parts.push(FormatPart::Placeholder(placeholder)); + } else if ch == '}' { + // Check for escaped brace }} + if chars.peek() == Some(&'}') { + chars.next(); + current_static.push('}'); + } else { + panic!("Unmatched }} in format string"); + } + } else { + current_static.push(ch); + } + } + + // Add any remaining static text + if !current_static.is_empty() { + parts.push(FormatPart::Static(current_static)); + } + + parts +} + +/// Generate optimized formatting code +fn generate_format_code(parts: &[FormatPart], args: TokenStream) -> TokenStream { + use proc_macro::TokenTree as TT; + + // Parse arguments into positional and named + let mut positional_args = Vec::new(); + let mut named_args = std::collections::HashMap::new(); + + let arg_tokens: Vec = args.into_iter().collect(); + let mut i = 0; + + while i < arg_tokens.len() { + // Check if this is a named argument (ident = value) + if let Some(TT::Ident(name)) = arg_tokens.get(i) + && let Some(TT::Punct(punct)) = arg_tokens.get(i + 1) + && punct.as_char() == '=' + { + // Named argument + let name_str = name.to_string(); + let mut value_tokens = Vec::new(); + i += 2; // Skip name and = + + // Collect value tokens until comma or end + while i < arg_tokens.len() { + if let TT::Punct(p) = &arg_tokens[i] + && p.as_char() == ',' + { + i += 1; + break; + } + value_tokens.push(arg_tokens[i].clone()); + i += 1; + } + + named_args.insert(name_str, value_tokens); + continue; + } + + // Positional argument + let mut value_tokens = Vec::new(); + while i < arg_tokens.len() { + if let TT::Punct(p) = &arg_tokens[i] + && p.as_char() == ',' + { + i += 1; + break; + } + value_tokens.push(arg_tokens[i].clone()); + i += 1; + } + + if !value_tokens.is_empty() { + positional_args.push(value_tokens); + } + } + + // Calculate static size + let static_size: usize = parts + .iter() + .filter_map(|p| match p { + FormatPart::Static(s) => Some(s.len()), + _ => None, + }) + .sum(); + + // Generate code to build the string - build it as a string to avoid ToTokens issues + let mut code = format!( + "{{ let mut __result = ::std::string::String::with_capacity({} + 32);", + static_size + ); + + let mut next_positional_idx = 0; + + for part in parts { + match part { + FormatPart::Static(text) => { + code.push_str(&format!("__result.push_str({:?});", text)); + }, + FormatPart::Placeholder(placeholder) => { + let arg_tokens = match placeholder { + PlaceholderType::NextPositional => { + if let Some(arg) = positional_args.get(next_positional_idx) { + next_positional_idx += 1; + arg + } else { + panic!("Not enough positional arguments"); + } + }, + PlaceholderType::Positional(idx) => { + if let Some(arg) = positional_args.get(*idx) { + arg + } else { + panic!("Positional argument {} not found", idx); + } + }, + PlaceholderType::Named(name) => { + if let Some(arg) = named_args.get(name) { + arg + } else { + panic!("Named argument '{}' not found", name); + } + }, + }; + + // Convert the token trees to a string representation + let arg_str: String = arg_tokens + .iter() + .map(|tt| tt.to_string()) + .collect::>() + .join(""); + + code.push_str(&format!( + "{{ use ::std::fmt::Write; let _ = write!(&mut __result, \"{{}}\", {}); }}", + arg_str + )); + }, + } + } + + code.push_str("__result }"); + + // Parse the generated code string back into a TokenStream + code.parse().expect("Failed to parse generated code") +} + +/// Handles the conversion between String Literals represented by TokenStream and Rust String type +/// +/// Thanks to https://github.com/scpso/const-css-minify, the code snippet below is nearly a Copy & Paste +fn input_to_string(input: TokenStream) -> String { + let token_trees: Vec<_> = input.into_iter().collect(); + if token_trees.len() != 1 { + panic!("Expected exactly one token tree, got {}", token_trees.len()); + } + let TokenTree::Literal(literal) = token_trees.first().unwrap() else { + panic!("Expected a string literal"); + }; + literal_to_string(literal.to_string()) +} + +/// Convert a literal token string to its actual string value +fn literal_to_string(mut literal: String) -> String { + // Unescape the raw string literal + if let Some(c) = literal.get(0..=0) + && c != "r" + { + literal = literal + .replace("\\\"", "\"") + .replace("\\n", "\n") + .replace("\\r", "\r") + .replace("\\t", "\t") + .replace("\\\\", "\\") + } + + // trim leading and trailing ".." or r#".."# from string literal + let start = &literal.find('\"').unwrap() + 1; + let end = &literal.rfind('\"').unwrap() - 1; + if start > end { + panic!("Invalid string literal"); + } + literal[start..=end].to_string() +} + +/// Minifies XML content by removing unnecessary whitespace, comments, and collapsing empty tags +/// +/// This implementation follows best practices for XML minification: +/// - Preserves XML declarations +/// - Removes comments and processing instructions +/// - Intelligently trims whitespace between elements +/// - Collapses empty element tags +/// - Handles CDATA sections properly +/// +/// # Performance +/// - Zero-copy where possible using `Cow<[u8]>` +/// - Single-pass processing +/// - Efficient buffer reuse +fn minify_xml(xml: &str) -> Result> { + let mut reader = Reader::from_str(xml); + reader.config_mut().trim_text(false); // We handle trimming ourselves for better control + + let mut output = Vec::with_capacity(xml.len() / 2); // Pre-allocate roughly half the size + let mut buf = Vec::new(); + + // Stack to track element names for collapsing empty tags + let mut tag_stack: Vec> = Vec::new(); + + loop { + match reader.read_event_into(&mut buf)? { + Event::Eof => { + // Flush any remaining buffered start tags before EOF + // (this can happen if the root element never closes in the stream) + for start_tag in tag_stack.drain(..) { + output.push(b'<'); + output.extend_from_slice(start_tag.name().as_ref()); + write_attributes(&mut output, &start_tag)?; + output.push(b'>'); + } + break; + }, + + // Preserve XML declaration - write it as-is + Event::Decl(e) => { + output.extend_from_slice(b""); + }, + + // Skip comments - they're not needed in minified output + Event::Comment(_) => continue, + + // Skip processing instructions (except xml declaration handled above) + Event::PI(_) => continue, + + // Handle DOCTYPE declarations - preserve them + Event::DocType(e) => { + output.extend_from_slice(b"'); + }, + + // Handle start tags - buffer them to check if they can be collapsed + Event::Start(e) => { + // Clone the tag for our stack (we need owned data) + let owned = e.to_owned(); + tag_stack.push(owned); + }, + + // Handle empty tags - flush buffered tags first, then write + Event::Empty(e) => { + // Flush all buffered start tags since we have an empty element + let tags_to_flush = std::mem::take(&mut tag_stack); + for start_tag in tags_to_flush { + output.push(b'<'); + output.extend_from_slice(start_tag.name().as_ref()); + write_attributes(&mut output, &start_tag)?; + output.push(b'>'); + } + + // Now write the empty tag + output.push(b'<'); + output.extend_from_slice(e.name().as_ref()); + write_attributes(&mut output, &e)?; + output.extend_from_slice(b"/>"); + }, + + // Handle end tags - check if we can collapse with start tag + Event::End(e) => { + if let Some(start_tag) = tag_stack.pop() { + // Check if this end tag matches the last start tag + // If so, we can collapse to an empty tag + if start_tag.name() == e.name() { + // Before writing the collapsed tag, flush all other buffered start tags + // This ensures proper nesting: not + let remaining_tags = std::mem::take(&mut tag_stack); + for buffered_tag in remaining_tags { + output.push(b'<'); + output.extend_from_slice(buffered_tag.name().as_ref()); + write_attributes(&mut output, &buffered_tag)?; + output.push(b'>'); + } + + // Now write the collapsed tag + output.push(b'<'); + output.extend_from_slice(start_tag.name().as_ref()); + write_attributes(&mut output, &start_tag)?; + output.extend_from_slice(b"/>"); + } else { + // Tags don't match - we have content in between + // Flush all buffered tags + let mut all_tags = std::mem::take(&mut tag_stack); + all_tags.push(start_tag); + + for buffered_tag in all_tags { + output.push(b'<'); + output.extend_from_slice(buffered_tag.name().as_ref()); + write_attributes(&mut output, &buffered_tag)?; + output.push(b'>'); + } + + // Write the end tag + output.push(b'<'); + output.push(b'/'); + output.extend_from_slice(e.name().as_ref()); + output.push(b'>'); + } + } else { + // No matching start tag in our buffer - just write end tag + output.push(b'<'); + output.push(b'/'); + output.extend_from_slice(e.name().as_ref()); + output.push(b'>'); + } + }, + + // Handle text content - trim whitespace intelligently + Event::Text(e) => { + // Get the text content + let text = e.as_ref(); + + // Intelligently handle whitespace + // Skip pure whitespace between tags, otherwise trim both leading and trailing whitespace + // This is safe for most XML use cases where whitespace between elements is not significant + let trimmed = if is_whitespace_only(text) { + &[] + } else { + trim_whitespace(text) + }; + + // Only flush buffered start tags if we have non-whitespace text content + if !trimmed.is_empty() { + // Flush ALL buffered start tags since we have text content + // Use mem::take to efficiently move all elements out of the stack + let tags_to_flush = std::mem::take(&mut tag_stack); + for start_tag in tags_to_flush { + output.push(b'<'); + output.extend_from_slice(start_tag.name().as_ref()); + write_attributes(&mut output, &start_tag)?; + output.push(b'>'); + } + + output.extend_from_slice(trimmed); + } + }, + + // Preserve CDATA sections as-is (they may contain formatting-sensitive content) + Event::CData(e) => { + // Flush ALL buffered start tags in correct order + let tags_to_flush = std::mem::take(&mut tag_stack); + for start_tag in tags_to_flush { + output.push(b'<'); + output.extend_from_slice(start_tag.name().as_ref()); + write_attributes(&mut output, &start_tag)?; + output.push(b'>'); + } + + output.extend_from_slice(b""); + }, + + // Skip entity references - they'll be handled by the parser + // This case is for general entity references which are rare in modern XML + Event::GeneralRef(_) => continue, + } + + buf.clear(); + } + + // Flush any remaining buffered tags (shouldn't happen with valid XML) + for start_tag in tag_stack { + output.push(b'<'); + output.extend_from_slice(start_tag.name().as_ref()); + write_attributes(&mut output, &start_tag)?; + output.push(b'>'); + } + + let result = String::from_utf8(output)?; + Ok(result) +} + +/// Helper function to write attributes efficiently +#[inline] +fn write_attributes(output: &mut Vec, tag: &BytesStart) -> Result<(), quick_xml::Error> { + for attr in tag.attributes() { + let attr = attr?; + output.push(b' '); + output.extend_from_slice(attr.key.as_ref()); + output.extend_from_slice(b"=\""); + output.extend_from_slice(&attr.value); + output.push(b'"'); + } + Ok(()) +} + +/// Check if a byte slice contains only whitespace characters +#[inline] +fn is_whitespace_only(bytes: &[u8]) -> bool { + bytes + .iter() + .all(|&b| matches!(b, b' ' | b'\t' | b'\n' | b'\r')) +} + +/// Trim leading and trailing whitespace from byte slice +#[inline] +fn trim_whitespace(bytes: &[u8]) -> &[u8] { + let start = bytes + .iter() + .position(|&b| !matches!(b, b' ' | b'\t' | b'\n' | b'\r')) + .unwrap_or(bytes.len()); + + let end = bytes + .iter() + .rposition(|&b| !matches!(b, b' ' | b'\t' | b'\n' | b'\r')) + .map(|pos| pos + 1) + .unwrap_or(0); + + if start <= end { + &bytes[start..end] + } else { + &[] + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_minify_xml_basic() { + let input = r#" + + + + Text content + + + + "#; + + let minified = minify_xml(input).unwrap(); + + // Should remove extra whitespace and comments + assert!(!minified.contains(" + + Some text content + + + + + here]]> + + + + "#; + let minified = minify_xml(input).unwrap(); + + // Verify comment removal + assert!(!minified.contains(" - - Text content - - - -``` - -The macro produces: - -```xml -Text content -``` - -## Implementation Details - -### Whitespace Handling - -The minifier intelligently handles whitespace: -- Removes pure whitespace between tags -- Trims leading and trailing whitespace from text nodes -- Preserves text content - -### CDATA Sections - -CDATA sections are preserved as-is since they may contain formatting-sensitive content: - -```xml - with special chars]]> -``` - -### XML Declarations - -XML declarations are preserved with their attributes: - -```xml - -``` - -### DOCTYPE Declarations - -DOCTYPE declarations are preserved: - -```xml - -``` - -## Performance - -- **Zero runtime cost**: Minification happens at compile time -- **Efficient processing**: Single-pass with buffer reuse -- **Memory-efficient**: Pre-allocates approximately half the input size -- **Zero-copy where possible**: Uses `Cow<[u8]>` and byte slices - -# Tips for Rust Analyzer users - -Note that the procedure macro utilizes the `local_file()` function to access the source code file, -and rust-analyzer would not correctly handle the expansion due to its limitation. -In order not to produce tons of errors and warnings, add the following settings to your VS Code settings: - -```json -{ - "rust-analyzer.procMacro.ignored": { - "xml-minifier": ["minified_xml"] - } -} -``` - -## License - -This is part of the Litchi project so it is licensed under the same license that the project adopts. - diff --git a/xml-minifier/src/lib.rs b/xml-minifier/src/lib.rs deleted file mode 100644 index 4d31aeb..0000000 --- a/xml-minifier/src/lib.rs +++ /dev/null @@ -1,1087 +0,0 @@ -use proc_macro::{TokenStream, TokenTree}; -use quick_xml::Reader; -use quick_xml::events::{BytesStart, Event}; -use quote::quote; -use std::fs; -use std::path::Path; - -/// Minifies an XML string literal at compile time -/// -/// This macro performs aggressive XML minification including: -/// - Removing comments and processing instructions -/// - Trimming and collapsing whitespace in text nodes -/// - Collapsing empty tags (`` → ``) -/// - Removing unnecessary whitespace between tags -/// -/// Unlike [`minified_xml!`], this macro takes an XML string literal directly -/// instead of reading from a file. -/// -/// # Examples -/// -/// ```ignore -/// const TEMPLATE: &str = minified_xml_str!(r#" -/// -/// -/// -/// -/// Some text content -/// -/// -/// -/// "#); -/// // Result: Some text content -/// ``` -#[proc_macro] -pub fn minified_xml_str(input: TokenStream) -> TokenStream { - let xml_content = input_to_string(input); - - // Minify the XML - let minified = minify_xml(&xml_content) - .unwrap_or_else(|e| panic!("Failed to minify XML string literal: {}", e)); - - let expanded = quote! { - #minified - }; - - // Generate the output token stream - TokenStream::from(expanded) -} - -/// Minifies an XML file at compile time and embeds it as a string literal -/// -/// This macro performs aggressive XML minification including: -/// - Removing comments and processing instructions -/// - Trimming and collapsing whitespace in text nodes -/// - Collapsing empty tags (`` → ``) -/// - Removing unnecessary whitespace between tags -/// -/// # Path Resolution -/// -/// File paths are resolved **relative to the source file** that invokes the macro. -/// This allows for intuitive usage where XML files can be placed next to the source code. -/// -/// For minifying XML string literals directly, see [`minified_xml_str!`]. -/// -/// # Examples -/// -/// ```ignore -/// // If you have this structure: -/// // src/ -/// // templates/ -/// // mod.rs -/// // document.xml -/// // -/// // In templates/mod.rs: -/// const TEMPLATE: &str = minified_xml!("document.xml"); -/// -/// // Or in parent directory: -/// const TEMPLATE: &str = minified_xml!("templates/document.xml"); -/// ``` -#[proc_macro] -pub fn minified_xml(input: TokenStream) -> TokenStream { - let file_path = input_to_string(input); - - // Get the source file location where the macro was called - let call_site = proc_macro::Span::call_site(); - let source_file = call_site.local_file().expect("Failed to get local file"); - let target_path = source_file - .parent() - .expect("Failed to get parent directory of calling file") - .join(Path::new(&file_path)); - - // Canonicalize to get absolute path (helps with error messages and change detection) - let canonical_path = target_path - .canonicalize() - .unwrap_or_else(|e| panic!("Failed to canonicalize file path '{}': {}", file_path, e)); - - // Read the XML file - let xml_content = fs::read_to_string(&canonical_path).expect("Failed to read XML file"); - - // Minify the XML - let minified = minify_xml(&xml_content) - .unwrap_or_else(|e| panic!("Failed to minify XML from '{}': {}", file_path, e)); - - let expanded = quote! { - #minified - }; - - // Generate the output token stream - TokenStream::from(expanded) -} - -/// Minifies an XML template and formats it with arguments at runtime, with compile-time optimizations -/// -/// This macro combines XML minification with optimized string formatting: -/// - Minifies the XML template at compile time -/// - Pre-calculates sizes of static parts -/// - Pre-allocates exact memory needed -/// - Avoids format! macro overhead through direct string building -/// -/// The syntax is similar to `format!`, but the template is minified first. -/// -/// # Formatting Syntax -/// -/// - `{}` - Positional argument (uses `Display` trait) -/// - `{0}`, `{1}`, ... - Indexed positional argument -/// - `{name}` - Named argument -/// -/// # Examples -/// -/// ```ignore -/// // Basic usage with positional arguments -/// let name = "document"; -/// let version = "1.0"; -/// let xml = minified_xml_format!(r#" -/// -/// -/// -/// {} -/// -/// "#, version, name); -/// // Result: document -/// -/// // With named arguments -/// let xml = minified_xml_format!( -/// r#"{name}{age}"#, -/// name = "Alice", -/// age = 30 -/// ); -/// // Result: Alice30 -/// ``` -#[proc_macro] -pub fn minified_xml_format(input: TokenStream) -> TokenStream { - // Parse the input tokens - let tokens: Vec = input.into_iter().collect(); - - if tokens.is_empty() { - panic!("minified_xml_format! requires at least a format string"); - } - - // Extract the format string (first argument) - let format_str_literal = &tokens[0]; - let TokenTree::Literal(lit) = format_str_literal else { - panic!("First argument must be a string literal"); - }; - - let template = literal_to_string(lit.to_string()); - - // Replace format placeholders with temporary markers before minification - // This prevents the XML parser from being confused by {} characters - let (template_with_markers, placeholder_map) = replace_placeholders_with_markers(&template); - - // Minify the XML template - let minified = minify_xml(&template_with_markers) - .unwrap_or_else(|e| panic!("Failed to minify XML template: {}", e)); - - // Restore the placeholders - let minified_with_placeholders = restore_placeholders_from_markers(&minified, &placeholder_map); - - // Parse the remaining arguments - let args = if tokens.len() > 1 { - // Skip the first token (format string) and the comma - let mut arg_tokens = Vec::new(); - let mut i = 1; - - // Skip comma after format string - if let Some(TokenTree::Punct(p)) = tokens.get(i) - && p.as_char() == ',' - { - i += 1; - } - - while i < tokens.len() { - arg_tokens.push(tokens[i].clone()); - i += 1; - } - - TokenStream::from_iter(arg_tokens) - } else { - TokenStream::new() - }; - - // Parse the minified template to find format placeholders and static parts - let parts = parse_format_string(&minified_with_placeholders); - - // Generate optimized code - generate_format_code(&parts, args) -} - -/// Replace format placeholders with unique markers that won't confuse the XML parser -/// Returns the modified string and a map of marker -> placeholder -fn replace_placeholders_with_markers(template: &str) -> (String, Vec) { - let mut result = String::with_capacity(template.len()); - let mut placeholders = Vec::new(); - let mut chars = template.chars().peekable(); - - while let Some(ch) = chars.next() { - if ch == '{' { - // Check for escaped brace {{ - if chars.peek() == Some(&'{') { - chars.next(); - result.push_str("{{"); - continue; - } - - // Parse the placeholder content - let mut placeholder_content = String::new(); - placeholder_content.push('{'); - - loop { - match chars.next() { - Some('}') => { - placeholder_content.push('}'); - break; - }, - Some(ch) => placeholder_content.push(ch), - None => { - // Unclosed placeholder, just add what we have - result.push_str(&placeholder_content); - return (result, placeholders); - }, - } - } - - // Create a unique marker - let marker = format!("__PLACEHOLDER_{}__", placeholders.len()); - placeholders.push(placeholder_content); - result.push_str(&marker); - } else if ch == '}' { - // Check for escaped brace }} - if chars.peek() == Some(&'}') { - chars.next(); - result.push_str("}}"); - } else { - result.push('}'); - } - } else { - result.push(ch); - } - } - - (result, placeholders) -} - -/// Restore the original placeholders from markers -fn restore_placeholders_from_markers(minified: &str, placeholders: &[String]) -> String { - let mut result = minified.to_string(); - - // Replace markers back with original placeholders - for (idx, placeholder) in placeholders.iter().enumerate() { - let marker = format!("__PLACEHOLDER_{}__", idx); - result = result.replace(&marker, placeholder); - } - - result -} - -/// Represents a part of a format string -#[derive(Debug, Clone)] -enum FormatPart { - /// Static text that doesn't need formatting - Static(String), - /// A format placeholder (either positional index or named argument) - Placeholder(PlaceholderType), -} - -/// Type of format placeholder -#[derive(Debug, Clone)] -enum PlaceholderType { - /// Positional argument by index (e.g., {0}, {1}) - Positional(usize), - /// Named argument (e.g., {name}) - Named(String), - /// Next positional argument (e.g., {}) - NextPositional, -} - -/// Parse a format string into static parts and placeholders -fn parse_format_string(template: &str) -> Vec { - let mut parts = Vec::new(); - let mut current_static = String::new(); - let mut chars = template.chars().peekable(); - - while let Some(ch) = chars.next() { - if ch == '{' { - // Check for escaped brace {{ - if chars.peek() == Some(&'{') { - chars.next(); - current_static.push('{'); - continue; - } - - // Save any accumulated static text - if !current_static.is_empty() { - parts.push(FormatPart::Static(current_static.clone())); - current_static.clear(); - } - - // Parse the placeholder content - let mut placeholder_content = String::new(); - loop { - match chars.next() { - Some('}') => break, - Some(ch) => placeholder_content.push(ch), - None => panic!("Unclosed format placeholder in template"), - } - } - - // Determine placeholder type - let placeholder = if placeholder_content.is_empty() { - PlaceholderType::NextPositional - } else if placeholder_content.chars().all(|c| c.is_ascii_digit()) { - PlaceholderType::Positional( - placeholder_content - .parse() - .expect("Invalid positional index"), - ) - } else { - PlaceholderType::Named(placeholder_content) - }; - - parts.push(FormatPart::Placeholder(placeholder)); - } else if ch == '}' { - // Check for escaped brace }} - if chars.peek() == Some(&'}') { - chars.next(); - current_static.push('}'); - } else { - panic!("Unmatched }} in format string"); - } - } else { - current_static.push(ch); - } - } - - // Add any remaining static text - if !current_static.is_empty() { - parts.push(FormatPart::Static(current_static)); - } - - parts -} - -/// Generate optimized formatting code -fn generate_format_code(parts: &[FormatPart], args: TokenStream) -> TokenStream { - use proc_macro::TokenTree as TT; - - // Parse arguments into positional and named - let mut positional_args = Vec::new(); - let mut named_args = std::collections::HashMap::new(); - - let arg_tokens: Vec = args.into_iter().collect(); - let mut i = 0; - - while i < arg_tokens.len() { - // Check if this is a named argument (ident = value) - if let Some(TT::Ident(name)) = arg_tokens.get(i) - && let Some(TT::Punct(punct)) = arg_tokens.get(i + 1) - && punct.as_char() == '=' - { - // Named argument - let name_str = name.to_string(); - let mut value_tokens = Vec::new(); - i += 2; // Skip name and = - - // Collect value tokens until comma or end - while i < arg_tokens.len() { - if let TT::Punct(p) = &arg_tokens[i] - && p.as_char() == ',' - { - i += 1; - break; - } - value_tokens.push(arg_tokens[i].clone()); - i += 1; - } - - named_args.insert(name_str, value_tokens); - continue; - } - - // Positional argument - let mut value_tokens = Vec::new(); - while i < arg_tokens.len() { - if let TT::Punct(p) = &arg_tokens[i] - && p.as_char() == ',' - { - i += 1; - break; - } - value_tokens.push(arg_tokens[i].clone()); - i += 1; - } - - if !value_tokens.is_empty() { - positional_args.push(value_tokens); - } - } - - // Calculate static size - let static_size: usize = parts - .iter() - .filter_map(|p| match p { - FormatPart::Static(s) => Some(s.len()), - _ => None, - }) - .sum(); - - // Generate code to build the string - build it as a string to avoid ToTokens issues - let mut code = format!( - "{{ let mut __result = ::std::string::String::with_capacity({} + 32);", - static_size - ); - - let mut next_positional_idx = 0; - - for part in parts { - match part { - FormatPart::Static(text) => { - code.push_str(&format!("__result.push_str({:?});", text)); - }, - FormatPart::Placeholder(placeholder) => { - let arg_tokens = match placeholder { - PlaceholderType::NextPositional => { - if let Some(arg) = positional_args.get(next_positional_idx) { - next_positional_idx += 1; - arg - } else { - panic!("Not enough positional arguments"); - } - }, - PlaceholderType::Positional(idx) => { - if let Some(arg) = positional_args.get(*idx) { - arg - } else { - panic!("Positional argument {} not found", idx); - } - }, - PlaceholderType::Named(name) => { - if let Some(arg) = named_args.get(name) { - arg - } else { - panic!("Named argument '{}' not found", name); - } - }, - }; - - // Convert the token trees to a string representation - let arg_str: String = arg_tokens - .iter() - .map(|tt| tt.to_string()) - .collect::>() - .join(""); - - code.push_str(&format!( - "{{ use ::std::fmt::Write; let _ = write!(&mut __result, \"{{}}\", {}); }}", - arg_str - )); - }, - } - } - - code.push_str("__result }"); - - // Parse the generated code string back into a TokenStream - code.parse().expect("Failed to parse generated code") -} - -/// Handles the conversion between String Literals represented by TokenStream and Rust String type -/// -/// Thanks to https://github.com/scpso/const-css-minify, the code snippet below is nearly a Copy & Paste -fn input_to_string(input: TokenStream) -> String { - let token_trees: Vec<_> = input.into_iter().collect(); - if token_trees.len() != 1 { - panic!("Expected exactly one token tree, got {}", token_trees.len()); - } - let TokenTree::Literal(literal) = token_trees.first().unwrap() else { - panic!("Expected a string literal"); - }; - literal_to_string(literal.to_string()) -} - -/// Convert a literal token string to its actual string value -fn literal_to_string(mut literal: String) -> String { - // Unescape the raw string literal - if let Some(c) = literal.get(0..=0) - && c != "r" - { - literal = literal - .replace("\\\"", "\"") - .replace("\\n", "\n") - .replace("\\r", "\r") - .replace("\\t", "\t") - .replace("\\\\", "\\") - } - - // trim leading and trailing ".." or r#".."# from string literal - let start = &literal.find('\"').unwrap() + 1; - let end = &literal.rfind('\"').unwrap() - 1; - if start > end { - panic!("Invalid string literal"); - } - literal[start..=end].to_string() -} - -/// Minifies XML content by removing unnecessary whitespace, comments, and collapsing empty tags -/// -/// This implementation follows best practices for XML minification: -/// - Preserves XML declarations -/// - Removes comments and processing instructions -/// - Intelligently trims whitespace between elements -/// - Collapses empty element tags -/// - Handles CDATA sections properly -/// -/// # Performance -/// - Zero-copy where possible using `Cow<[u8]>` -/// - Single-pass processing -/// - Efficient buffer reuse -fn minify_xml(xml: &str) -> Result> { - let mut reader = Reader::from_str(xml); - reader.config_mut().trim_text(false); // We handle trimming ourselves for better control - - let mut output = Vec::with_capacity(xml.len() / 2); // Pre-allocate roughly half the size - let mut buf = Vec::new(); - - // Stack to track element names for collapsing empty tags - let mut tag_stack: Vec> = Vec::new(); - - loop { - match reader.read_event_into(&mut buf)? { - Event::Eof => { - // Flush any remaining buffered start tags before EOF - // (this can happen if the root element never closes in the stream) - for start_tag in tag_stack.drain(..) { - output.push(b'<'); - output.extend_from_slice(start_tag.name().as_ref()); - write_attributes(&mut output, &start_tag)?; - output.push(b'>'); - } - break; - }, - - // Preserve XML declaration - write it as-is - Event::Decl(e) => { - output.extend_from_slice(b""); - }, - - // Skip comments - they're not needed in minified output - Event::Comment(_) => continue, - - // Skip processing instructions (except xml declaration handled above) - Event::PI(_) => continue, - - // Handle DOCTYPE declarations - preserve them - Event::DocType(e) => { - output.extend_from_slice(b"'); - }, - - // Handle start tags - buffer them to check if they can be collapsed - Event::Start(e) => { - // Clone the tag for our stack (we need owned data) - let owned = e.to_owned(); - tag_stack.push(owned); - }, - - // Handle empty tags - flush buffered tags first, then write - Event::Empty(e) => { - // Flush all buffered start tags since we have an empty element - let tags_to_flush = std::mem::take(&mut tag_stack); - for start_tag in tags_to_flush { - output.push(b'<'); - output.extend_from_slice(start_tag.name().as_ref()); - write_attributes(&mut output, &start_tag)?; - output.push(b'>'); - } - - // Now write the empty tag - output.push(b'<'); - output.extend_from_slice(e.name().as_ref()); - write_attributes(&mut output, &e)?; - output.extend_from_slice(b"/>"); - }, - - // Handle end tags - check if we can collapse with start tag - Event::End(e) => { - if let Some(start_tag) = tag_stack.pop() { - // Check if this end tag matches the last start tag - // If so, we can collapse to an empty tag - if start_tag.name() == e.name() { - // Before writing the collapsed tag, flush all other buffered start tags - // This ensures proper nesting: not - let remaining_tags = std::mem::take(&mut tag_stack); - for buffered_tag in remaining_tags { - output.push(b'<'); - output.extend_from_slice(buffered_tag.name().as_ref()); - write_attributes(&mut output, &buffered_tag)?; - output.push(b'>'); - } - - // Now write the collapsed tag - output.push(b'<'); - output.extend_from_slice(start_tag.name().as_ref()); - write_attributes(&mut output, &start_tag)?; - output.extend_from_slice(b"/>"); - } else { - // Tags don't match - we have content in between - // Flush all buffered tags - let mut all_tags = std::mem::take(&mut tag_stack); - all_tags.push(start_tag); - - for buffered_tag in all_tags { - output.push(b'<'); - output.extend_from_slice(buffered_tag.name().as_ref()); - write_attributes(&mut output, &buffered_tag)?; - output.push(b'>'); - } - - // Write the end tag - output.push(b'<'); - output.push(b'/'); - output.extend_from_slice(e.name().as_ref()); - output.push(b'>'); - } - } else { - // No matching start tag in our buffer - just write end tag - output.push(b'<'); - output.push(b'/'); - output.extend_from_slice(e.name().as_ref()); - output.push(b'>'); - } - }, - - // Handle text content - trim whitespace intelligently - Event::Text(e) => { - // Get the text content - let text = e.as_ref(); - - // Intelligently handle whitespace - // Skip pure whitespace between tags, otherwise trim both leading and trailing whitespace - // This is safe for most XML use cases where whitespace between elements is not significant - let trimmed = if is_whitespace_only(text) { - &[] - } else { - trim_whitespace(text) - }; - - // Only flush buffered start tags if we have non-whitespace text content - if !trimmed.is_empty() { - // Flush ALL buffered start tags since we have text content - // Use mem::take to efficiently move all elements out of the stack - let tags_to_flush = std::mem::take(&mut tag_stack); - for start_tag in tags_to_flush { - output.push(b'<'); - output.extend_from_slice(start_tag.name().as_ref()); - write_attributes(&mut output, &start_tag)?; - output.push(b'>'); - } - - output.extend_from_slice(trimmed); - } - }, - - // Preserve CDATA sections as-is (they may contain formatting-sensitive content) - Event::CData(e) => { - // Flush ALL buffered start tags in correct order - let tags_to_flush = std::mem::take(&mut tag_stack); - for start_tag in tags_to_flush { - output.push(b'<'); - output.extend_from_slice(start_tag.name().as_ref()); - write_attributes(&mut output, &start_tag)?; - output.push(b'>'); - } - - output.extend_from_slice(b""); - }, - - // Skip entity references - they'll be handled by the parser - // This case is for general entity references which are rare in modern XML - Event::GeneralRef(_) => continue, - } - - buf.clear(); - } - - // Flush any remaining buffered tags (shouldn't happen with valid XML) - for start_tag in tag_stack { - output.push(b'<'); - output.extend_from_slice(start_tag.name().as_ref()); - write_attributes(&mut output, &start_tag)?; - output.push(b'>'); - } - - let result = String::from_utf8(output)?; - Ok(result) -} - -/// Helper function to write attributes efficiently -#[inline] -fn write_attributes(output: &mut Vec, tag: &BytesStart) -> Result<(), quick_xml::Error> { - for attr in tag.attributes() { - let attr = attr?; - output.push(b' '); - output.extend_from_slice(attr.key.as_ref()); - output.extend_from_slice(b"=\""); - output.extend_from_slice(&attr.value); - output.push(b'"'); - } - Ok(()) -} - -/// Check if a byte slice contains only whitespace characters -#[inline] -fn is_whitespace_only(bytes: &[u8]) -> bool { - bytes - .iter() - .all(|&b| matches!(b, b' ' | b'\t' | b'\n' | b'\r')) -} - -/// Trim leading and trailing whitespace from byte slice -#[inline] -fn trim_whitespace(bytes: &[u8]) -> &[u8] { - let start = bytes - .iter() - .position(|&b| !matches!(b, b' ' | b'\t' | b'\n' | b'\r')) - .unwrap_or(bytes.len()); - - let end = bytes - .iter() - .rposition(|&b| !matches!(b, b' ' | b'\t' | b'\n' | b'\r')) - .map(|pos| pos + 1) - .unwrap_or(0); - - if start <= end { - &bytes[start..end] - } else { - &[] - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_minify_xml_basic() { - let input = r#" - - - - Text content - - - - "#; - - let minified = minify_xml(input).unwrap(); - - // Should remove extra whitespace and comments - assert!(!minified.contains(" - - Some text content - - - - - here]]> - - - - "#; - let minified = minify_xml(input).unwrap(); - - // Verify comment removal - assert!(!minified.contains(" + + + + + + +"#; + +const MINIFIED: &str = minified_xml_str!( + r#" + + + + + + + +"# +); + +fn main() { + println!("Input length: {} bytes", INPUT.len()); + println!("Minified length: {} bytes", MINIFIED.len()); + println!("Minified XML:"); + println!("{}", MINIFIED); +} From 4dd28658dc59f71cf39160c22c7b8f74570bcb34 Mon Sep 17 00:00:00 2001 From: Ryker Zhu Date: Sun, 31 May 2026 01:11:22 +0800 Subject: [PATCH 25/25] style: apply cargo fmt to all example files --- crates/litchi-cfb/examples/inspect_ole.rs | 6 +----- crates/litchi-core/examples/bom_demo.rs | 5 ++--- crates/litchi-core/examples/detect_format.rs | 2 +- crates/litchi-eval/examples/evaluate_simple.rs | 9 ++------- .../litchi-eval/examples/evaluate_with_table.rs | 15 +++------------ crates/litchi-imgconv/examples/metafile_to_svg.rs | 12 +++--------- crates/litchi-iwa/examples/extract_structured.rs | 6 ++---- crates/litchi-iwa/examples/read_iwork.rs | 6 ++---- crates/litchi-markdown/examples/style_options.rs | 10 +++++----- crates/litchi-odf/examples/read_odt.rs | 6 +----- crates/litchi-ole/examples/read_xls.rs | 3 ++- crates/litchi-ooxml/examples/read_xlsx.rs | 5 ++++- crates/litchi-ooxml/examples/write_docx.rs | 4 +++- crates/litchi-opc/examples/extract_part.rs | 12 ++++++------ crates/litchi-opc/examples/inspect_package.rs | 4 +--- crates/litchi-opc/examples/pack_uri_demo.rs | 2 +- crates/litchi-opc/examples/write_package.rs | 3 +-- crates/litchi-rtf/examples/compressed_rtf.rs | 5 ++++- crates/soapberry-zip/examples/extract_entry.rs | 5 ++++- crates/soapberry-zip/examples/list_archive.rs | 5 +---- 20 files changed, 49 insertions(+), 76 deletions(-) diff --git a/crates/litchi-cfb/examples/inspect_ole.rs b/crates/litchi-cfb/examples/inspect_ole.rs index 019e692..47e5fdb 100644 --- a/crates/litchi-cfb/examples/inspect_ole.rs +++ b/crates/litchi-cfb/examples/inspect_ole.rs @@ -46,11 +46,7 @@ fn main() -> ExampleResult<()> { }; probe_buf.truncate(n); if !is_ole_file(&probe_buf) { - return Err(format!( - "Not a CFB/OLE2 file (magic mismatch): {}", - path.display() - ) - .into()); + return Err(format!("Not a CFB/OLE2 file (magic mismatch): {}", path.display()).into()); } println!("Signature OK: D0 CF 11 E0 A1 B1 1A E1"); diff --git a/crates/litchi-core/examples/bom_demo.rs b/crates/litchi-core/examples/bom_demo.rs index 93605ba..1dc9da3 100644 --- a/crates/litchi-core/examples/bom_demo.rs +++ b/crates/litchi-core/examples/bom_demo.rs @@ -14,8 +14,7 @@ //! No CLI arguments are required — the example is fully self-contained. use litchi_core::{ - BomKind, UTF8_BOM, UTF16_BE_BOM, UTF16_LE_BOM, UTF32_BE_BOM, UTF32_LE_BOM, strip_bom, - write_bom, + BomKind, UTF8_BOM, UTF16_BE_BOM, UTF16_LE_BOM, UTF32_BE_BOM, UTF32_LE_BOM, strip_bom, write_bom, }; use std::io::Cursor; @@ -107,7 +106,7 @@ fn demo_round_trip( found, consumed ); assert_eq!(found, kind, "round trip mismatch for {:?}", kind); - } + }, None => println!(" detected BOM : "), } println!( diff --git a/crates/litchi-core/examples/detect_format.rs b/crates/litchi-core/examples/detect_format.rs index e6edf69..1d342dc 100644 --- a/crates/litchi-core/examples/detect_format.rs +++ b/crates/litchi-core/examples/detect_format.rs @@ -28,7 +28,7 @@ fn main() -> Result<(), Box> { None => { eprintln!("usage: detect_format "); std::process::exit(2); - } + }, }; let path = Path::new(&target); diff --git a/crates/litchi-eval/examples/evaluate_simple.rs b/crates/litchi-eval/examples/evaluate_simple.rs index 13f9752..ce4cdfe 100644 --- a/crates/litchi-eval/examples/evaluate_simple.rs +++ b/crates/litchi-eval/examples/evaluate_simple.rs @@ -15,8 +15,7 @@ use std::borrow::Cow; use std::collections::HashMap; use litchi_core::sheet::{ - Cell, CellIterator, CellValue, Result, RowIterator, WorkbookTrait, Worksheet, - WorksheetIterator, + Cell, CellIterator, CellValue, Result, RowIterator, WorkbookTrait, Worksheet, WorksheetIterator, }; use litchi_eval::FormulaEvaluator; @@ -162,11 +161,7 @@ impl Worksheet for MemSheet { fn cell(&self, row: u32, column: u32) -> Result> { let value = self.cells.get(&(row, column)).unwrap_or(CellValue::EMPTY); - Ok(Box::new(MemCell { - row, - column, - value, - })) + Ok(Box::new(MemCell { row, column, value })) } fn cell_by_coordinate(&self, _coordinate: &str) -> Result> { diff --git a/crates/litchi-eval/examples/evaluate_with_table.rs b/crates/litchi-eval/examples/evaluate_with_table.rs index 7da6c7c..b385d7c 100644 --- a/crates/litchi-eval/examples/evaluate_with_table.rs +++ b/crates/litchi-eval/examples/evaluate_with_table.rs @@ -14,8 +14,7 @@ use std::borrow::Cow; use std::collections::HashMap; use litchi_core::sheet::{ - Cell, CellIterator, CellValue, Result, RowIterator, WorkbookTrait, Worksheet, - WorksheetIterator, + Cell, CellIterator, CellValue, Result, RowIterator, WorkbookTrait, Worksheet, WorksheetIterator, }; use litchi_eval::{FormulaEvaluator, TableConfig}; @@ -143,11 +142,7 @@ impl Worksheet for MemSheet { fn cell(&self, row: u32, column: u32) -> Result> { let value = self.cells.get(&(row, column)).unwrap_or(CellValue::EMPTY); - Ok(Box::new(MemCell { - row, - column, - value, - })) + Ok(Box::new(MemCell { row, column, value })) } fn cell_by_coordinate(&self, _coordinate: &str) -> Result> { @@ -320,11 +315,7 @@ async fn main() -> std::result::Result<(), Box Result<(), Box> { Some("wmf") => wmf::convert_wmf_to_svg(&bytes)?, Some("pict" | "pct") => { // The pict module currently has no convert_pict_to_svg; raster-only. - return Err( - "PICT to SVG conversion is not exposed by litchi-imgconv; \ + return Err("PICT to SVG conversion is not exposed by litchi-imgconv; \ use convert_pict (raster) instead" - .into(), - ); + .into()); }, other => { - return Err(format!( - "unsupported extension {:?}; expected .emf or .wmf", - other - ) - .into()); + return Err(format!("unsupported extension {:?}; expected .emf or .wmf", other).into()); }, }; diff --git a/crates/litchi-iwa/examples/extract_structured.rs b/crates/litchi-iwa/examples/extract_structured.rs index 6050659..0b85d5a 100644 --- a/crates/litchi-iwa/examples/extract_structured.rs +++ b/crates/litchi-iwa/examples/extract_structured.rs @@ -30,15 +30,13 @@ fn main() -> Result<(), Box> { ); eprintln!("or point at any Numbers document on disk."); return Ok(()); - } + }, }; let path = Path::new(&path); if !path.exists() { eprintln!("file not found: {}", path.display()); - eprintln!( - "Numbers test fixtures are not committed; please supply a real .numbers path." - ); + eprintln!("Numbers test fixtures are not committed; please supply a real .numbers path."); return Ok(()); } diff --git a/crates/litchi-iwa/examples/read_iwork.rs b/crates/litchi-iwa/examples/read_iwork.rs index abf244d..22b96e9 100644 --- a/crates/litchi-iwa/examples/read_iwork.rs +++ b/crates/litchi-iwa/examples/read_iwork.rs @@ -31,15 +31,13 @@ fn main() -> Result<(), Box> { ); eprintln!("or point at any iWork bundle on disk."); return Ok(()); - } + }, }; let path = Path::new(&path); if !path.exists() { eprintln!("file not found: {}", path.display()); - eprintln!( - "iWork test fixtures are not committed; please supply a real document path." - ); + eprintln!("iWork test fixtures are not committed; please supply a real document path."); return Ok(()); } diff --git a/crates/litchi-markdown/examples/style_options.rs b/crates/litchi-markdown/examples/style_options.rs index 5d7be39..280c85b 100644 --- a/crates/litchi-markdown/examples/style_options.rs +++ b/crates/litchi-markdown/examples/style_options.rs @@ -48,11 +48,11 @@ impl ToMarkdown for MathSnippet { ScriptStyle::Html => { writeln!(out, "x{}", self.subscript).unwrap(); writeln!(out, "x{}", self.superscript).unwrap(); - } + }, ScriptStyle::Unicode => { writeln!(out, "x{}", convert_to_subscript(&self.subscript)).unwrap(); writeln!(out, "x{}", convert_to_superscript(&self.superscript)).unwrap(); - } + }, } // --- Formula rendering ------------------------------------------------ @@ -73,7 +73,7 @@ impl ToMarkdown for MathSnippet { writeln!(out, "| Header |").unwrap(); writeln!(out, "|--------|").unwrap(); writeln!(out, "| {} |", self.cell).unwrap(); - } + }, TableStyle::MinimalHtml => { writeln!( out, @@ -81,7 +81,7 @@ impl ToMarkdown for MathSnippet { self.cell ) .unwrap(); - } + }, TableStyle::StyledHtml => { let pad = " ".repeat(options.html_table_indent); writeln!(out, "
").unwrap(); @@ -92,7 +92,7 @@ impl ToMarkdown for MathSnippet { writeln!(out, "{pad}{pad}", self.cell).unwrap(); writeln!(out, "{pad}").unwrap(); writeln!(out, "
{}
").unwrap(); - } + }, } Ok(out) diff --git a/crates/litchi-odf/examples/read_odt.rs b/crates/litchi-odf/examples/read_odt.rs index c3decc9..1227890 100644 --- a/crates/litchi-odf/examples/read_odt.rs +++ b/crates/litchi-odf/examples/read_odt.rs @@ -25,11 +25,7 @@ fn main() -> Result<(), Box> { builder.add_heading("litchi-odf example", 1)?; builder.add_paragraph("This document was created by the read_odt example.")?; builder.add_paragraph("It demonstrates a simple build-then-read round trip.")?; - builder.add_bulleted_list(vec![ - "First bullet", - "Second bullet", - "Third bullet", - ])?; + builder.add_bulleted_list(vec!["First bullet", "Second bullet", "Third bullet"])?; builder.add_heading("Conclusion", 2)?; builder.add_paragraph("Reading round-trips text content successfully.")?; // `save` consumes the builder, so use the tempfile path explicitly. diff --git a/crates/litchi-ole/examples/read_xls.rs b/crates/litchi-ole/examples/read_xls.rs index 5b43e76..9c147e2 100644 --- a/crates/litchi-ole/examples/read_xls.rs +++ b/crates/litchi-ole/examples/read_xls.rs @@ -44,7 +44,8 @@ fn main() -> Result<(), Box> { let mut shown = 0usize; let mut iter = sheet.cells(); while let Some(cell_result) = iter.next() { - let cell = cell_result.map_err(|e| -> Box { e.to_string().into() })?; + let cell = + cell_result.map_err(|e| -> Box { e.to_string().into() })?; if cell.is_empty() { continue; } diff --git a/crates/litchi-ooxml/examples/read_xlsx.rs b/crates/litchi-ooxml/examples/read_xlsx.rs index f40446a..a44cb58 100644 --- a/crates/litchi-ooxml/examples/read_xlsx.rs +++ b/crates/litchi-ooxml/examples/read_xlsx.rs @@ -42,7 +42,10 @@ fn main() -> Result<(), Box> { println!("\n--- Sheet [{}]: {:?} ---", index, ws.name()); let dims = ws.dimensions(); - println!("dimensions (min_row, min_col, max_row, max_col): {:?}", dims); + println!( + "dimensions (min_row, min_col, max_row, max_col): {:?}", + dims + ); let Some((min_r, min_c, max_r, max_c)) = dims else { println!("(empty sheet)"); diff --git a/crates/litchi-ooxml/examples/write_docx.rs b/crates/litchi-ooxml/examples/write_docx.rs index 5ff77ce..23d57b5 100644 --- a/crates/litchi-ooxml/examples/write_docx.rs +++ b/crates/litchi-ooxml/examples/write_docx.rs @@ -38,7 +38,9 @@ fn main() -> Result<(), Box> { para.add_run_with_text(", "); para.add_run_with_text("italic").italic(true); para.add_run_with_text(", and "); - para.add_run_with_text("bold-italic").bold(true).italic(true); + para.add_run_with_text("bold-italic") + .bold(true) + .italic(true); para.add_run_with_text(" runs."); // Small 2x2 table with a header row. diff --git a/crates/litchi-opc/examples/extract_part.rs b/crates/litchi-opc/examples/extract_part.rs index b183ef9..4f57fc6 100644 --- a/crates/litchi-opc/examples/extract_part.rs +++ b/crates/litchi-opc/examples/extract_part.rs @@ -28,10 +28,10 @@ fn main() -> Result<(), Box> { let pkg_path: PathBuf = args .next() .map(PathBuf::from) - .unwrap_or_else(|| { - PathBuf::from("test-data/ooxml/docx/documentProperties.docx") - }); - let partname_str = args.next().unwrap_or_else(|| "/word/document.xml".to_string()); + .unwrap_or_else(|| PathBuf::from("test-data/ooxml/docx/documentProperties.docx")); + let partname_str = args + .next() + .unwrap_or_else(|| "/word/document.xml".to_string()); let out_path: Option = args.next().map(PathBuf::from); // Demonstrate PackURI parsing and inspection. @@ -69,14 +69,14 @@ fn main() -> Result<(), Box> { Some(path) => { std::fs::write(&path, blob)?; eprintln!("Wrote {} bytes to {}", blob.len(), path.display()); - } + }, None => { // Stream the blob to stdout. Use a locked handle for efficiency. let stdout = std::io::stdout(); let mut handle = stdout.lock(); handle.write_all(blob)?; handle.flush()?; - } + }, } Ok(()) diff --git a/crates/litchi-opc/examples/inspect_package.rs b/crates/litchi-opc/examples/inspect_package.rs index bfe0b61..b64ae26 100644 --- a/crates/litchi-opc/examples/inspect_package.rs +++ b/crates/litchi-opc/examples/inspect_package.rs @@ -17,9 +17,7 @@ fn main() -> Result<(), Box> { let path: PathBuf = std::env::args() .nth(1) .map(PathBuf::from) - .unwrap_or_else(|| { - PathBuf::from("test-data/ooxml/docx/documentProperties.docx") - }); + .unwrap_or_else(|| PathBuf::from("test-data/ooxml/docx/documentProperties.docx")); println!("Opening OPC package: {}", path.display()); let pkg = OpcPackage::open(&path)?; diff --git a/crates/litchi-opc/examples/pack_uri_demo.rs b/crates/litchi-opc/examples/pack_uri_demo.rs index b2f2810..e573e3a 100644 --- a/crates/litchi-opc/examples/pack_uri_demo.rs +++ b/crates/litchi-opc/examples/pack_uri_demo.rs @@ -33,7 +33,7 @@ fn main() -> Result<(), Box> { uri.ext(), uri.idx(), ); - } + }, Err(e) => println!(" {input:?} -> ERROR: {e}"), } } diff --git a/crates/litchi-opc/examples/write_package.rs b/crates/litchi-opc/examples/write_package.rs index 024b206..e949df5 100644 --- a/crates/litchi-opc/examples/write_package.rs +++ b/crates/litchi-opc/examples/write_package.rs @@ -19,8 +19,7 @@ fn main() -> Result<(), Box> { // --- 1. Build the package in memory -------------------------------- let mut pkg = OpcPackage::new(); - let partname = PackURI::new("/custom/data.xml") - .map_err(|e| format!("invalid PackURI: {e}"))?; + let partname = PackURI::new("/custom/data.xml").map_err(|e| format!("invalid PackURI: {e}"))?; let xml = br#" world diff --git a/crates/litchi-rtf/examples/compressed_rtf.rs b/crates/litchi-rtf/examples/compressed_rtf.rs index 873ed27..35f650d 100644 --- a/crates/litchi-rtf/examples/compressed_rtf.rs +++ b/crates/litchi-rtf/examples/compressed_rtf.rs @@ -50,7 +50,10 @@ This text is repeated.\\par}"; println!("\nUncompressed framing"); println!("{}", "-".repeat(60)); println!("Stored payload size: {} bytes", stored.len()); - println!("is_compressed_rtf(stored) -> {}", is_compressed_rtf(&stored)); + println!( + "is_compressed_rtf(stored) -> {}", + is_compressed_rtf(&stored) + ); let stored_round_trip = decompress(&stored)?; assert_eq!( diff --git a/crates/soapberry-zip/examples/extract_entry.rs b/crates/soapberry-zip/examples/extract_entry.rs index 936288b..4c3331a 100644 --- a/crates/soapberry-zip/examples/extract_entry.rs +++ b/crates/soapberry-zip/examples/extract_entry.rs @@ -53,7 +53,10 @@ fn main() -> Result<(), Box> { print!("{}", preview); if bytes.len() > MAX_PRINT_BYTES { println!(); - println!("... ({} more bytes truncated)", bytes.len() - MAX_PRINT_BYTES); + println!( + "... ({} more bytes truncated)", + bytes.len() - MAX_PRINT_BYTES + ); } else { println!(); } diff --git a/crates/soapberry-zip/examples/list_archive.rs b/crates/soapberry-zip/examples/list_archive.rs index 1c96021..49ac32e 100644 --- a/crates/soapberry-zip/examples/list_archive.rs +++ b/crates/soapberry-zip/examples/list_archive.rs @@ -40,10 +40,7 @@ fn main() -> Result<(), Box> { // sizes per entry, including directory entries. let slice_archive = ZipArchive::from_slice(&data)?; println!(); - println!( - "{:<60} {:>12} {:>12}", - "name", "comp size", "uncomp size" - ); + println!("{:<60} {:>12} {:>12}", "name", "comp size", "uncomp size"); println!("{}", "-".repeat(90)); for entry_result in slice_archive.entries() { let entry = entry_result?;