From bd92e8f643b7164a099c40250146497d5424f149 Mon Sep 17 00:00:00 2001 From: Brett Langdon Date: Wed, 24 Jun 2026 12:43:40 -0400 Subject: [PATCH 1/6] fix(data-pipeline): ensure large span strings get truncated before encoding --- libdd-data-pipeline/src/trace_exporter/mod.rs | 4 + .../src/trace_exporter/trace_serializer.rs | 170 +++++++++++ libdd-trace-utils/src/span/mod.rs | 49 +++ libdd-trace-utils/src/span/trace_utils.rs | 281 +++++++++++++++++- 4 files changed, 502 insertions(+), 2 deletions(-) diff --git a/libdd-data-pipeline/src/trace_exporter/mod.rs b/libdd-data-pipeline/src/trace_exporter/mod.rs index 554c88efb8..da8f0b26bf 100644 --- a/libdd-data-pipeline/src/trace_exporter/mod.rs +++ b/libdd-data-pipeline/src/trace_exporter/mod.rs @@ -643,6 +643,10 @@ impl Tra ) -> Result { let mut header_tags: TracerHeaderTags = self.metadata.borrow().into(); + // Truncate over-long string fields before any downstream processing so that stats, + // serialization, and the OTLP path all operate on the same normalized payload. + libdd_trace_utils::span::trace_utils::truncate_span_strings(&mut traces); + // Process stats computation and drop non-sampled (p0) chunks. // This must run before the OTLP path so that unsampled spans are not exported. stats::process_traces_for_stats( diff --git a/libdd-data-pipeline/src/trace_exporter/trace_serializer.rs b/libdd-data-pipeline/src/trace_exporter/trace_serializer.rs index 7cfd62a5a0..9dc1e3fe88 100644 --- a/libdd-data-pipeline/src/trace_exporter/trace_serializer.rs +++ b/libdd-data-pipeline/src/trace_exporter/trace_serializer.rs @@ -460,4 +460,174 @@ mod tests { assert!(!headers.contains_key("datadog-client-computed-stats")); assert!(headers.contains_key("datadog-client-computed-top-level")); } + + // ----------------------------------------------------------------------- + // Truncation end-to-end regression tests + // + // These tests verify that over-long string fields survive the full + // truncate → encode → decode round-trip correctly. They mirror the + // dd-trace-py snapshot tests for + // `test_encode_span_with_large_string_attributes` (ASCII) and + // `test_encode_span_with_large_unicode_string_attributes` (multi-byte). + // ----------------------------------------------------------------------- + + use libdd_trace_utils::span::trace_utils::{ + truncate_span_strings, MAX_SPAN_STRING_LEN, TRUNCATED_SPAN_STRING_LEN, + }; + + const TRUNCATION_SUFFIX: &str = "..."; + + fn long_bytes_string(c: char, n: usize) -> BytesString { + BytesString::from_string(std::iter::repeat_n(c, n).collect()) + } + + /// Build a span whose `resource`, one meta key, and one meta value are + /// each at interesting boundary lengths, matching the dd-trace-py snapshot + /// test fixture: + /// - name: 25 000 'a' chars → exactly at the limit → NOT truncated + /// - resource: 25 001 'b' chars → one over the limit → truncated to 2 500 + /// - meta key: 25 001 'c' chars → truncated to 2 500 + /// - meta value: 2 000 'd' chars → well under limit → unchanged + fn create_large_string_span() -> SpanBytes { + SpanBytes { + name: long_bytes_string('a', MAX_SPAN_STRING_LEN), + resource: long_bytes_string('b', MAX_SPAN_STRING_LEN + 1), + service: BytesString::from_slice(b"svc").unwrap(), + meta: vec![( + long_bytes_string('c', MAX_SPAN_STRING_LEN + 1), + long_bytes_string('d', 2_000), + )] + .into(), + span_id: 1, + trace_id: 1, + start: 1_000_000, + duration: 1_000, + ..Default::default() + } + } + + fn assert_truncation_invariants(span: &libdd_trace_utils::span::v04::SpanBytes) { + // name at exactly the limit — must be unchanged + assert_eq!( + span.name.as_str().chars().count(), + MAX_SPAN_STRING_LEN, + "name should not be truncated" + ); + + // resource one over the limit — must be truncated + assert_eq!( + span.resource.as_str().chars().count(), + TRUNCATED_SPAN_STRING_LEN, + "resource should be truncated to {TRUNCATED_SPAN_STRING_LEN}" + ); + assert!( + span.resource.as_str().ends_with(TRUNCATION_SUFFIX), + "truncated resource must end with the suffix" + ); + + // meta: key was over the limit, value was under + let (k, v) = span.meta.iter().next().expect("meta should be non-empty"); + assert_eq!( + k.as_str().chars().count(), + TRUNCATED_SPAN_STRING_LEN, + "meta key should be truncated" + ); + assert!(k.as_str().ends_with(TRUNCATION_SUFFIX)); + assert_eq!( + v.as_str().chars().count(), + 2_000, + "meta value under limit must be unchanged" + ); + } + + #[test] + fn test_truncation_survives_v04_encode_decode_round_trip() { + let serializer = TraceSerializer::new(); + let mut traces = vec![vec![create_large_string_span()]]; + + truncate_span_strings(&mut traces); + + let payload = serializer + .collect_and_process_traces(traces, TraceExporterOutputFormat::V04) + .unwrap(); + let serialized = serializer + .serialize_payload(&payload, &TracerMetadata::default()) + .unwrap(); + + let (decoded, _) = + libdd_trace_utils::msgpack_decoder::v04::from_slice(&serialized).unwrap(); + assert_eq!(decoded.len(), 1); + assert_eq!(decoded[0].len(), 1); + + // Decoded spans use &str (SliceData); re-check lengths via char count. + let span = &decoded[0][0]; + assert_eq!(span.name.chars().count(), MAX_SPAN_STRING_LEN); + assert_eq!(span.resource.chars().count(), TRUNCATED_SPAN_STRING_LEN); + assert!(span.resource.ends_with(TRUNCATION_SUFFIX)); + let (k, v) = span.meta.iter().next().unwrap(); + assert_eq!(k.chars().count(), TRUNCATED_SPAN_STRING_LEN); + assert_eq!(v.chars().count(), 2_000); + } + + #[test] + fn test_truncation_survives_v05_encode_decode_round_trip() { + let serializer = TraceSerializer::new(); + let mut traces = vec![vec![create_large_string_span()]]; + + truncate_span_strings(&mut traces); + + // Verify truncation happened in memory before we encode. + assert_truncation_invariants(&traces[0][0]); + + let payload = serializer + .collect_and_process_traces(traces, TraceExporterOutputFormat::V05) + .unwrap(); + let serialized = serializer + .serialize_payload(&payload, &TracerMetadata::default()) + .unwrap(); + + let (decoded, _) = + libdd_trace_utils::msgpack_decoder::v05::from_slice(&serialized).unwrap(); + assert_eq!(decoded.len(), 1); + assert_eq!(decoded[0].len(), 1); + + let span = &decoded[0][0]; + assert_eq!(span.name.chars().count(), MAX_SPAN_STRING_LEN); + assert_eq!(span.resource.chars().count(), TRUNCATED_SPAN_STRING_LEN); + assert!(span.resource.ends_with(TRUNCATION_SUFFIX)); + let (k, v) = span.meta.iter().next().unwrap(); + assert_eq!(k.chars().count(), TRUNCATED_SPAN_STRING_LEN); + assert_eq!(v.chars().count(), 2_000); + } + + #[test] + fn test_truncation_unicode_survives_v04_encode_decode_round_trip() { + // Each '€' is 3 bytes; 25 001 euros → truncated to 2 500 code points. + let serializer = TraceSerializer::new(); + let mut traces = vec![vec![SpanBytes { + name: long_bytes_string('€', MAX_SPAN_STRING_LEN + 1), + resource: BytesString::from_slice(b"r").unwrap(), + service: BytesString::from_slice(b"svc").unwrap(), + span_id: 1, + trace_id: 1, + start: 1_000_000, + duration: 1_000, + ..Default::default() + }]]; + + truncate_span_strings(&mut traces); + + let payload = serializer + .collect_and_process_traces(traces, TraceExporterOutputFormat::V04) + .unwrap(); + let serialized = serializer + .serialize_payload(&payload, &TracerMetadata::default()) + .unwrap(); + + let (decoded, _) = + libdd_trace_utils::msgpack_decoder::v04::from_slice(&serialized).unwrap(); + let name = decoded[0][0].name; + assert_eq!(name.chars().count(), TRUNCATED_SPAN_STRING_LEN); + assert!(name.ends_with(TRUNCATION_SUFFIX)); + } } diff --git a/libdd-trace-utils/src/span/mod.rs b/libdd-trace-utils/src/span/mod.rs index 1a122efe99..4859e5f44c 100644 --- a/libdd-trace-utils/src/span/mod.rs +++ b/libdd-trace-utils/src/span/mod.rs @@ -24,18 +24,67 @@ use std::{fmt, ptr}; /// from a static str and check if the string is empty. pub trait SpanText: Debug + Eq + Hash + Borrow + Serialize + Default { fn from_static_str(value: &'static str) -> Self; + + /// If `self` exceeds `max_chars` Unicode code points, return a new value consisting of the + /// first `result_chars - suffix.chars().count()` code points followed by `suffix`; otherwise + /// return `self` unchanged. + /// + /// Implementations that cannot allocate (e.g. `&str`) return `self` unmodified. + fn maybe_truncate(self, max_chars: usize, result_chars: usize, suffix: &str) -> Self; } impl SpanText for &str { fn from_static_str(value: &'static str) -> Self { value } + + fn maybe_truncate(self, _max_chars: usize, _result_chars: usize, _suffix: &str) -> Self { + // &str is borrowed; allocation is impossible, so truncation is silently skipped. + // The only path that produces &str spans is the zero-copy msgpack decoder + // (SpanSlice / SliceData), whose callers must already enforce length limits upstream. + self + } } impl SpanText for BytesString { fn from_static_str(value: &'static str) -> Self { BytesString::from_static(value) } + + fn maybe_truncate(self, max_chars: usize, result_chars: usize, suffix: &str) -> Self { + let s = self.as_str(); + // Fast path: UTF-8 byte length >= char count, so byte length within limit ⇒ chars fit. + if s.len() <= max_chars { + return self; + } + // Single pass: find the byte offset of char `keep_chars` and count total chars together, + // avoiding a separate O(n) `chars().count()` scan followed by another `char_indices()` walk. + let suffix_chars = suffix.chars().count(); + debug_assert!( + result_chars >= suffix_chars, + "result_chars ({result_chars}) must be >= suffix length ({suffix_chars})" + ); + let keep_chars = result_chars.saturating_sub(suffix_chars); + let mut keep_byte_end = None; + let mut total_chars = 0usize; + for (byte_pos, _) in s.char_indices() { + if total_chars == keep_chars { + keep_byte_end = Some(byte_pos); + } + total_chars += 1; + if total_chars > max_chars { + break; + } + } + if total_chars <= max_chars { + return self; + } + let end = keep_byte_end.unwrap_or(s.len()); + let mut truncated = String::with_capacity(end + suffix.len()); + truncated.push_str(&s[..end]); + truncated.push_str(suffix); + BytesString::from_string(truncated) + } } pub trait SpanBytes: Debug + Eq + Hash + Borrow<[u8]> + Serialize + Default { diff --git a/libdd-trace-utils/src/span/trace_utils.rs b/libdd-trace-utils/src/span/trace_utils.rs index 60790aa3cb..56908979a8 100644 --- a/libdd-trace-utils/src/span/trace_utils.rs +++ b/libdd-trace-utils/src/span/trace_utils.rs @@ -5,9 +5,120 @@ use tracing::debug; -use super::{v04::Span, SpanText, TraceData}; +use super::{ + v04::{AttributeAnyValue, AttributeArrayValue, Span}, + SpanText, TraceData, +}; use std::collections::{HashMap, HashSet}; +/// Fields whose Unicode code-point count exceeds this threshold are truncated. +pub const MAX_SPAN_STRING_LEN: usize = 25_000; +/// Length (in Unicode code points) to which over-long fields are truncated, including the suffix. +pub const TRUNCATED_SPAN_STRING_LEN: usize = 2_500; +/// Suffix appended to every truncated field. +const TRUNCATION_SUFFIX: &str = "..."; + +/// Truncate all text fields in every span across all trace chunks. +/// +/// Any field whose Unicode code-point count exceeds [`MAX_SPAN_STRING_LEN`] is replaced with +/// the first `TRUNCATED_SPAN_STRING_LEN - 14` code points followed by `"..."`, +/// giving a total of [`TRUNCATED_SPAN_STRING_LEN`] code points. Numeric fields and +/// `meta_struct` bytes are left untouched. +pub fn truncate_span_strings(traces: &mut [Vec>]) { + for chunk in traces.iter_mut() { + for span in chunk.iter_mut() { + truncate_span(span); + } + } +} + +fn trunc(v: S) -> S { + v.maybe_truncate( + MAX_SPAN_STRING_LEN, + TRUNCATED_SPAN_STRING_LEN, + TRUNCATION_SUFFIX, + ) +} + +fn trunc_in_place(field: &mut S) { + *field = trunc(std::mem::take(field)); +} + +fn truncate_attribute_value(v: AttributeAnyValue) -> AttributeAnyValue { + match v { + AttributeAnyValue::SingleValue(AttributeArrayValue::String(s)) => { + AttributeAnyValue::SingleValue(AttributeArrayValue::String(trunc(s))) + } + AttributeAnyValue::Array(vec) => AttributeAnyValue::Array( + vec.into_iter() + .map(|item| match item { + AttributeArrayValue::String(s) => AttributeArrayValue::String(trunc(s)), + other => other, + }) + .collect(), + ), + other => other, + } +} + +fn truncate_span(span: &mut Span) { + trunc_in_place(&mut span.service); + trunc_in_place(&mut span.name); + trunc_in_place(&mut span.resource); + trunc_in_place(&mut span.r#type); + + // If truncation makes two keys identical, the downstream span.dedup() call keeps the + // last original entry (VecMap dedup semantics). This mirrors the backend's own behavior + // when a tracer submits a span with duplicate keys. + for (key, value) in span.meta.iter_mut() { + trunc_in_place(key); + trunc_in_place(value); + } + + for (key, _value) in span.metrics.iter_mut() { + trunc_in_place(key); + } + + for (key, _value) in span.meta_struct.iter_mut() { + trunc_in_place(key); + } + + if !span.span_links.is_empty() { + span.span_links = std::mem::take(&mut span.span_links) + .into_iter() + .map(|mut link| { + trunc_in_place(&mut link.tracestate); + // Use entry API so that if truncation maps two originally-distinct keys to the + // same string, the first entry's value is kept and the second is dropped without + // allocating a truncated value for it. + let mut new_attrs = HashMap::with_capacity(link.attributes.len()); + for (k, v) in std::mem::take(&mut link.attributes) { + new_attrs.entry(trunc(k)).or_insert_with(|| trunc(v)); + } + link.attributes = new_attrs; + link + }) + .collect(); + } + + if !span.span_events.is_empty() { + span.span_events = std::mem::take(&mut span.span_events) + .into_iter() + .map(|mut event| { + trunc_in_place(&mut event.name); + let mut new_attrs = HashMap::with_capacity(event.attributes.len()); + for (k, v) in std::mem::take(&mut event.attributes) { + new_attrs + .entry(trunc(k)) + .or_insert_with(|| truncate_attribute_value(v)); + } + event.attributes = new_attrs; + event + }) + .collect(); + } +} + /// Span metric the mini agent must set for the backend to recognize top level span const TOP_LEVEL_KEY: &str = "_top_level"; /// Span metric the tracer sets to denote a top level span @@ -205,7 +316,10 @@ where #[cfg(test)] mod tests { use super::*; - use crate::span::v04::{SpanBytes, VecMap}; + use crate::span::v04::{ + AttributeAnyValue, AttributeArrayValue, SpanBytes, SpanEvent, SpanLink, VecMap, + }; + use std::collections::HashMap; fn create_test_span( trace_id: u64, @@ -437,4 +551,167 @@ mod tests { } } } + + // ----------------------------------------------------------------------- + // truncate_span_strings tests + // ----------------------------------------------------------------------- + + fn long_str(c: char, n: usize) -> String { + std::iter::repeat_n(c, n).collect() + } + + fn bs(s: &str) -> libdd_tinybytes::BytesString { + libdd_tinybytes::BytesString::from_string(s.to_string()) + } + + fn make_span(name: &str, resource: &str, meta_key: &str, meta_val: &str) -> SpanBytes { + SpanBytes { + name: bs(name), + resource: bs(resource), + meta: vec![(bs(meta_key), bs(meta_val))].into(), + ..Default::default() + } + } + + #[test] + fn test_no_truncation_at_limit() { + // Exactly 25_000 chars — should NOT be truncated. + let name = long_str('a', MAX_SPAN_STRING_LEN); + let mut traces = vec![vec![make_span(&name, "r", "k", "v")]]; + truncate_span_strings(&mut traces); + assert_eq!( + traces[0][0].name.as_str().chars().count(), + MAX_SPAN_STRING_LEN + ); + } + + #[test] + fn test_truncation_over_limit() { + // 25_001 chars — should be truncated to 2_500. + let resource = long_str('b', MAX_SPAN_STRING_LEN + 1); + let mut traces = vec![vec![make_span("n", &resource, "k", "v")]]; + truncate_span_strings(&mut traces); + let result = traces[0][0].resource.as_str(); + assert_eq!(result.chars().count(), TRUNCATED_SPAN_STRING_LEN); + assert!(result.ends_with(TRUNCATION_SUFFIX)); + } + + #[test] + fn test_meta_key_and_value_truncated() { + let long_key = long_str('c', MAX_SPAN_STRING_LEN + 1); + let short_val = long_str('d', 2_000); // under limit — unchanged + let mut traces = vec![vec![make_span("n", "r", &long_key, &short_val)]]; + truncate_span_strings(&mut traces); + let (k, v) = traces[0][0].meta.iter().next().unwrap(); + assert_eq!(k.as_str().chars().count(), TRUNCATED_SPAN_STRING_LEN); + assert!(k.as_str().ends_with(TRUNCATION_SUFFIX)); + assert_eq!(v.as_str().chars().count(), 2_000); // unchanged + } + + #[test] + fn test_unicode_truncation_by_code_points() { + // Each '€' is 3 bytes; 25_001 euros exceed the threshold. + let s = long_str('€', MAX_SPAN_STRING_LEN + 1); + let mut traces = vec![vec![make_span(&s, "r", "k", "v")]]; + truncate_span_strings(&mut traces); + let result = traces[0][0].name.as_str(); + // Result must be exactly TRUNCATED_SPAN_STRING_LEN code points. + assert_eq!(result.chars().count(), TRUNCATED_SPAN_STRING_LEN); + assert!(result.ends_with(TRUNCATION_SUFFIX)); + } + + #[test] + fn test_span_link_fields_truncated() { + let long_tracestate = long_str('x', MAX_SPAN_STRING_LEN + 1); + let long_attr_key = long_str('y', MAX_SPAN_STRING_LEN + 1); + let long_attr_val = long_str('z', MAX_SPAN_STRING_LEN + 1); + let mut traces = vec![vec![SpanBytes { + span_links: vec![SpanLink { + tracestate: long_tracestate.into(), + attributes: HashMap::from([(long_attr_key.into(), long_attr_val.into())]), + ..Default::default() + }], + ..Default::default() + }]]; + truncate_span_strings(&mut traces); + let link = &traces[0][0].span_links[0]; + assert_eq!( + link.tracestate.as_str().chars().count(), + TRUNCATED_SPAN_STRING_LEN + ); + let (k, v) = link.attributes.iter().next().unwrap(); + assert_eq!(k.as_str().chars().count(), TRUNCATED_SPAN_STRING_LEN); + assert_eq!(v.as_str().chars().count(), TRUNCATED_SPAN_STRING_LEN); + } + + #[test] + fn test_span_event_name_and_string_attribute_truncated() { + let long_name = long_str('e', MAX_SPAN_STRING_LEN + 1); + let long_str_attr = long_str('f', MAX_SPAN_STRING_LEN + 1); + let mut traces = vec![vec![SpanBytes { + span_events: vec![SpanEvent { + name: long_name.into(), + attributes: HashMap::from([ + ( + "str_attr".into(), + AttributeAnyValue::SingleValue(AttributeArrayValue::String( + long_str_attr.into(), + )), + ), + ( + "int_attr".into(), + AttributeAnyValue::SingleValue(AttributeArrayValue::Integer(42)), + ), + ]), + ..Default::default() + }], + ..Default::default() + }]]; + truncate_span_strings(&mut traces); + let event = &traces[0][0].span_events[0]; + assert_eq!( + event.name.as_str().chars().count(), + TRUNCATED_SPAN_STRING_LEN + ); + match event.attributes.get("str_attr").unwrap() { + AttributeAnyValue::SingleValue(AttributeArrayValue::String(s)) => { + assert_eq!(s.as_str().chars().count(), TRUNCATED_SPAN_STRING_LEN); + } + _ => panic!("expected string attribute"), + } + // Integer attribute untouched + match event.attributes.get("int_attr").unwrap() { + AttributeAnyValue::SingleValue(AttributeArrayValue::Integer(42)) => {} + _ => panic!("expected integer attribute"), + } + } + + #[test] + fn test_metric_key_truncated() { + let long_key = long_str('g', MAX_SPAN_STRING_LEN + 1); + let mut traces = vec![vec![SpanBytes { + metrics: vec![(bs(&long_key), 1.0_f64)].into(), + ..Default::default() + }]]; + truncate_span_strings(&mut traces); + let (k, v) = traces[0][0].metrics.iter().next().unwrap(); + assert_eq!(k.as_str().chars().count(), TRUNCATED_SPAN_STRING_LEN); + assert!(k.as_str().ends_with(TRUNCATION_SUFFIX)); + assert_eq!(*v, 1.0_f64); + } + + #[test] + fn test_meta_struct_key_truncated() { + use libdd_tinybytes::Bytes; + let long_key = long_str('h', MAX_SPAN_STRING_LEN + 1); + let payload = Bytes::from_static(b"some bytes"); + let mut traces = vec![vec![SpanBytes { + meta_struct: vec![(bs(&long_key), payload)].into(), + ..Default::default() + }]]; + truncate_span_strings(&mut traces); + let (k, v) = traces[0][0].meta_struct.iter().next().unwrap(); + assert_eq!(k.as_str().chars().count(), TRUNCATED_SPAN_STRING_LEN); + assert_eq!(v.as_ref(), b"some bytes"); // value unchanged + } } From 5ab305df92a06cd37f6e36d5933b5f5f3171afdd Mon Sep 17 00:00:00 2001 From: Brett Langdon Date: Wed, 24 Jun 2026 13:32:47 -0400 Subject: [PATCH 2/6] backwards compatible and rustfmt --- libdd-trace-utils/src/span/mod.rs | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/libdd-trace-utils/src/span/mod.rs b/libdd-trace-utils/src/span/mod.rs index 4859e5f44c..d1ba0fd88e 100644 --- a/libdd-trace-utils/src/span/mod.rs +++ b/libdd-trace-utils/src/span/mod.rs @@ -30,20 +30,21 @@ pub trait SpanText: Debug + Eq + Hash + Borrow + Serialize + Default { /// return `self` unchanged. /// /// Implementations that cannot allocate (e.g. `&str`) return `self` unmodified. - fn maybe_truncate(self, max_chars: usize, result_chars: usize, suffix: &str) -> Self; + fn maybe_truncate(self, max_chars: usize, result_chars: usize, suffix: &str) -> Self { + // Default: no allocation possible, so return unchanged. + // Implementations that own their storage (e.g. `BytesString`) should override this. + let _ = (max_chars, result_chars, suffix); + self + } } impl SpanText for &str { fn from_static_str(value: &'static str) -> Self { value } - - fn maybe_truncate(self, _max_chars: usize, _result_chars: usize, _suffix: &str) -> Self { - // &str is borrowed; allocation is impossible, so truncation is silently skipped. - // The only path that produces &str spans is the zero-copy msgpack decoder - // (SpanSlice / SliceData), whose callers must already enforce length limits upstream. - self - } + // maybe_truncate uses the default (no-op): &str is borrowed and cannot allocate. + // The only path that produces &str spans is the zero-copy msgpack decoder + // (SpanSlice / SliceData), whose callers enforce length limits upstream. } impl SpanText for BytesString { @@ -58,7 +59,8 @@ impl SpanText for BytesString { return self; } // Single pass: find the byte offset of char `keep_chars` and count total chars together, - // avoiding a separate O(n) `chars().count()` scan followed by another `char_indices()` walk. + // avoiding a separate O(n) `chars().count()` scan followed by another `char_indices()` + // walk. let suffix_chars = suffix.chars().count(); debug_assert!( result_chars >= suffix_chars, From ee4a455eeae5e4f1a2362ce81e7e617996af7858 Mon Sep 17 00:00:00 2001 From: Brett Langdon Date: Wed, 24 Jun 2026 14:45:33 -0400 Subject: [PATCH 3/6] ensure truncation occurs during send_async usage --- libdd-data-pipeline/src/trace_exporter/mod.rs | 69 ++++++++++++++++++- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/libdd-data-pipeline/src/trace_exporter/mod.rs b/libdd-data-pipeline/src/trace_exporter/mod.rs index 658f068c9b..562e7972d1 100644 --- a/libdd-data-pipeline/src/trace_exporter/mod.rs +++ b/libdd-data-pipeline/src/trace_exporter/mod.rs @@ -329,9 +329,14 @@ impl msgpack_decoder::v04::from_slice(data), - DeserInputFormat::V05 => msgpack_decoder::v05::from_slice(data), + DeserInputFormat::V04 => msgpack_decoder::v04::from_bytes(owned), + DeserInputFormat::V05 => msgpack_decoder::v05::from_bytes(owned), } .map_err(|e| { error!("Error deserializing trace from request body: {e}"); @@ -2130,6 +2135,66 @@ mod tests { ); mock_otlp.assert(); } + + // ----------------------------------------------------------------------- + // Truncation regression test for the send/send_async (msgpack-decode) path + // ----------------------------------------------------------------------- + + /// Verifies that decoding via `from_bytes` (what `send_async` now uses) + /// yields owned `SpanBytes` so that `truncate_span_strings` actually fires. + /// + /// Previously `send_async` used `from_slice`, which produced `SpanSlice` + /// spans (`T::Text = &str`). Because `&str` inherits the no-op default + /// for `maybe_truncate`, the truncation call was silently skipped and + /// over-25k fields were forwarded to the agent unchanged. + #[test] + fn test_send_async_path_truncates_over_long_fields() { + use libdd_trace_utils::msgpack_decoder; + use libdd_trace_utils::msgpack_encoder; + use libdd_trace_utils::span::trace_utils::{ + truncate_span_strings, MAX_SPAN_STRING_LEN, TRUNCATED_SPAN_STRING_LEN, + }; + + // Build a span with a resource one code point over the threshold. + let long_resource: String = std::iter::repeat_n('b', MAX_SPAN_STRING_LEN + 1).collect(); + let span = SpanBytes { + resource: BytesString::from_string(long_resource), + name: BytesString::from_slice(b"op").unwrap(), + service: BytesString::from_slice(b"svc").unwrap(), + span_id: 1, + trace_id: 1, + start: 1_000_000, + duration: 1_000, + ..Default::default() + }; + + // Encode to msgpack — this is the raw bytes a tracer hands to send(). + let payload = msgpack_encoder::v04::to_vec(&vec![vec![span]]); + + // --- Old (broken) path: from_slice → SpanSlice → &str → no-op truncation --- + let (mut old_traces, _) = msgpack_decoder::v04::from_slice(&payload).unwrap(); + truncate_span_strings(&mut old_traces); + assert_eq!( + old_traces[0][0].resource.chars().count(), + MAX_SPAN_STRING_LEN + 1, + "from_slice path cannot truncate (expected: still over-limit)" + ); + + // --- New (fixed) path: from_bytes → SpanBytes → BytesString → real truncation --- + let owned = libdd_tinybytes::Bytes::copy_from_slice(&payload); + let (mut new_traces, _) = msgpack_decoder::v04::from_bytes(owned).unwrap(); + truncate_span_strings(&mut new_traces); + let resource = new_traces[0][0].resource.as_str(); + assert_eq!( + resource.chars().count(), + TRUNCATED_SPAN_STRING_LEN, + "from_bytes path must truncate to {TRUNCATED_SPAN_STRING_LEN} code points" + ); + assert!( + resource.ends_with("..."), + "truncated resource must end with the suffix" + ); + } } #[cfg(test)] From 6fad88e6f45ae4851977c44b519785dd1002f897 Mon Sep 17 00:00:00 2001 From: Brett Langdon Date: Wed, 24 Jun 2026 15:02:11 -0400 Subject: [PATCH 4/6] remove useless vec --- libdd-data-pipeline/src/trace_exporter/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdd-data-pipeline/src/trace_exporter/mod.rs b/libdd-data-pipeline/src/trace_exporter/mod.rs index 562e7972d1..399cb524b8 100644 --- a/libdd-data-pipeline/src/trace_exporter/mod.rs +++ b/libdd-data-pipeline/src/trace_exporter/mod.rs @@ -2169,7 +2169,7 @@ mod tests { }; // Encode to msgpack — this is the raw bytes a tracer hands to send(). - let payload = msgpack_encoder::v04::to_vec(&vec![vec![span]]); + let payload = msgpack_encoder::v04::to_vec(&[vec![span]]); // --- Old (broken) path: from_slice → SpanSlice → &str → no-op truncation --- let (mut old_traces, _) = msgpack_decoder::v04::from_slice(&payload).unwrap(); From b113136a4e4647119be3634f22b72806bc62b220 Mon Sep 17 00:00:00 2001 From: Brett Langdon Date: Thu, 25 Jun 2026 13:06:23 -0400 Subject: [PATCH 5/6] explicit truncation parameter --- libdd-data-pipeline/src/trace_exporter/mod.rs | 101 +++++++++--------- 1 file changed, 48 insertions(+), 53 deletions(-) diff --git a/libdd-data-pipeline/src/trace_exporter/mod.rs b/libdd-data-pipeline/src/trace_exporter/mod.rs index 399cb524b8..64368110e7 100644 --- a/libdd-data-pipeline/src/trace_exporter/mod.rs +++ b/libdd-data-pipeline/src/trace_exporter/mod.rs @@ -329,14 +329,9 @@ impl msgpack_decoder::v04::from_bytes(owned), - DeserInputFormat::V05 => msgpack_decoder::v05::from_bytes(owned), + DeserInputFormat::V04 => msgpack_decoder::v04::from_slice(data), + DeserInputFormat::V05 => msgpack_decoder::v05::from_slice(data), } .map_err(|e| { error!("Error deserializing trace from request body: {e}"); @@ -355,7 +350,9 @@ impl>>, ) -> Result { self.check_agent_info().await; - self.send_trace_chunks_inner(trace_chunks).await + self.send_trace_chunks_inner(trace_chunks, true) + .await } /// Sends trace chunks via OTLP HTTP (JSON or protobuf) when OTLP config is enabled. @@ -663,12 +669,17 @@ impl( &self, mut traces: Vec>>, + truncate: bool, ) -> Result { let mut header_tags: TracerHeaderTags = self.metadata.borrow().into(); // Truncate over-long string fields before any downstream processing so that stats, - // serialization, and the OTLP path all operate on the same normalized payload. - libdd_trace_utils::span::trace_utils::truncate_span_strings(&mut traces); + // serialisation, and the OTLP path all operate on the same normalised payload. + // Skipped on the msgpack path (`send`/`send_async`) where the tracer is responsible + // for enforcing field-length limits before encoding. + if truncate { + libdd_trace_utils::span::trace_utils::truncate_span_strings(&mut traces); + } // Process stats computation and drop non-sampled (p0) chunks. // This must run before the OTLP path so that unsampled spans are not exported. @@ -2136,29 +2147,25 @@ mod tests { mock_otlp.assert(); } - // ----------------------------------------------------------------------- - // Truncation regression test for the send/send_async (msgpack-decode) path - // ----------------------------------------------------------------------- + // Documents the `truncate=false` path: spans decoded from msgpack via from_slice + // have T::Text = &str, for which truncate_span_strings is a no-op. This proves + // that send/send_async correctly leaves over-long fields unchanged. + // + // Note: there is no integration test that exercises send_trace_chunks_inner with + // truncate=true through the full send_trace_chunks_async call chain. The unit + // tests in trace_utils.rs prove that truncate_span_strings works on BytesData + // spans, and the trace_serializer round-trip tests verify that truncated data + // survives encoding, but neither goes through send_trace_chunks_inner itself. - /// Verifies that decoding via `from_bytes` (what `send_async` now uses) - /// yields owned `SpanBytes` so that `truncate_span_strings` actually fires. - /// - /// Previously `send_async` used `from_slice`, which produced `SpanSlice` - /// spans (`T::Text = &str`). Because `&str` inherits the no-op default - /// for `maybe_truncate`, the truncation call was silently skipped and - /// over-25k fields were forwarded to the agent unchanged. - #[test] - fn test_send_async_path_truncates_over_long_fields() { - use libdd_trace_utils::msgpack_decoder; - use libdd_trace_utils::msgpack_encoder; - use libdd_trace_utils::span::trace_utils::{ - truncate_span_strings, MAX_SPAN_STRING_LEN, TRUNCATED_SPAN_STRING_LEN, - }; + use libdd_trace_utils::span::trace_utils::MAX_SPAN_STRING_LEN; - // Build a span with a resource one code point over the threshold. - let long_resource: String = std::iter::repeat_n('b', MAX_SPAN_STRING_LEN + 1).collect(); + /// send_async decodes via from_slice (&str spans); truncate_span_strings is a + /// no-op on &str, so over-long fields pass through unchanged (tracer's responsibility). + #[test] + fn test_send_async_does_not_truncate_over_long_fields() { + let over_limit: String = std::iter::repeat_n('b', MAX_SPAN_STRING_LEN + 1).collect(); let span = SpanBytes { - resource: BytesString::from_string(long_resource), + resource: BytesString::from_string(over_limit), name: BytesString::from_slice(b"op").unwrap(), service: BytesString::from_slice(b"svc").unwrap(), span_id: 1, @@ -2167,32 +2174,20 @@ mod tests { duration: 1_000, ..Default::default() }; + let payload = libdd_trace_utils::msgpack_encoder::v04::to_vec(&[vec![span]]); - // Encode to msgpack — this is the raw bytes a tracer hands to send(). - let payload = msgpack_encoder::v04::to_vec(&[vec![span]]); + // Decode via from_slice — produces SpanSlice<'_> where T::Text = &str. + let (mut traces, _) = + libdd_trace_utils::msgpack_decoder::v04::from_slice(&payload).unwrap(); - // --- Old (broken) path: from_slice → SpanSlice → &str → no-op truncation --- - let (mut old_traces, _) = msgpack_decoder::v04::from_slice(&payload).unwrap(); - truncate_span_strings(&mut old_traces); - assert_eq!( - old_traces[0][0].resource.chars().count(), - MAX_SPAN_STRING_LEN + 1, - "from_slice path cannot truncate (expected: still over-limit)" - ); + // truncate_span_strings is a no-op for &str spans regardless of the truncate + // flag; calling it here proves the no-op property directly. + libdd_trace_utils::span::trace_utils::truncate_span_strings(&mut traces); - // --- New (fixed) path: from_bytes → SpanBytes → BytesString → real truncation --- - let owned = libdd_tinybytes::Bytes::copy_from_slice(&payload); - let (mut new_traces, _) = msgpack_decoder::v04::from_bytes(owned).unwrap(); - truncate_span_strings(&mut new_traces); - let resource = new_traces[0][0].resource.as_str(); assert_eq!( - resource.chars().count(), - TRUNCATED_SPAN_STRING_LEN, - "from_bytes path must truncate to {TRUNCATED_SPAN_STRING_LEN} code points" - ); - assert!( - resource.ends_with("..."), - "truncated resource must end with the suffix" + traces[0][0].resource.chars().count(), + MAX_SPAN_STRING_LEN + 1, + "send_async must not truncate — tracer is responsible for field-length limits" ); } } From c7caf7f419271e497076daf22dfa6aacb17f12c5 Mon Sep 17 00:00:00 2001 From: Brett Langdon Date: Thu, 25 Jun 2026 13:26:07 -0400 Subject: [PATCH 6/6] linting --- libdd-data-pipeline/src/trace_exporter/mod.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/libdd-data-pipeline/src/trace_exporter/mod.rs b/libdd-data-pipeline/src/trace_exporter/mod.rs index 64368110e7..1b4c844c12 100644 --- a/libdd-data-pipeline/src/trace_exporter/mod.rs +++ b/libdd-data-pipeline/src/trace_exporter/mod.rs @@ -350,9 +350,7 @@ impl>>, ) -> Result { self.check_agent_info().await; - self.send_trace_chunks_inner(trace_chunks, true) - .await + self.send_trace_chunks_inner(trace_chunks, true).await } /// Sends trace chunks via OTLP HTTP (JSON or protobuf) when OTLP config is enabled.