From 76fffe0960b66e5ebbb58168a626f8e29a88dcd8 Mon Sep 17 00:00:00 2001 From: Minh Vu Date: Sat, 27 Jun 2026 23:08:23 +0200 Subject: [PATCH 1/3] json: parse quoted integers exactly --- buffa/src/json_helpers.rs | 129 ++++++++++++++++++++++++++++++-- buffa/src/json_helpers/tests.rs | 27 +++++++ 2 files changed, 150 insertions(+), 6 deletions(-) diff --git a/buffa/src/json_helpers.rs b/buffa/src/json_helpers.rs index 5103f96e..75adf734 100644 --- a/buffa/src/json_helpers.rs +++ b/buffa/src/json_helpers.rs @@ -915,14 +915,131 @@ fn parse_int_from_str>(v: &str) -> Option { if let Ok(n) = v.parse::() { return I::try_from(n).ok(); } - // Fall back to parsing as f64 for exponential/float notation (e.g. "1e5"). - // The f64 intermediate silently rounds values > 2^53, but the direct - // integer parse above handles those cases exactly. - let f: f64 = v.parse().ok()?; - if !is_exact_integer(f) { + let n = parse_exact_decimal_int(v)?; + I::try_from(n).ok() +} + +/// Parse a decimal/exponential string as an exact integer. +/// +/// Accepts the numeric string forms the JSON integer helpers intentionally +/// support beyond plain integers: zero-fraction decimals like `"1.0"` and +/// decimal scientific notation like `"1e5"` / `"1.0e2"`. Returns `None` if +/// the value is not mathematically integral or would overflow `i128`. +fn parse_exact_decimal_int(v: &str) -> Option { + let bytes = v.as_bytes(); + if bytes.is_empty() { + return None; + } + + let mut i = 0usize; + let negative = match bytes[0] { + b'+' => { + i = 1; + false + } + b'-' => { + i = 1; + true + } + _ => false, + }; + + let int_start = i; + while i < bytes.len() && bytes[i].is_ascii_digit() { + i += 1; + } + let int_end = i; + + let mut frac_start = i; + let mut frac_end = i; + if i < bytes.len() && bytes[i] == b'.' { + i += 1; + frac_start = i; + while i < bytes.len() && bytes[i].is_ascii_digit() { + i += 1; + } + frac_end = i; + } + + if int_start == int_end && frac_start == frac_end { return None; } - I::try_from(f as i128).ok() + + let mut exponent = 0i64; + if i < bytes.len() && matches!(bytes[i], b'e' | b'E') { + i += 1; + let exp_negative = match bytes.get(i) { + Some(b'+') => { + i += 1; + false + } + Some(b'-') => { + i += 1; + true + } + _ => false, + }; + let exp_start = i; + while i < bytes.len() && bytes[i].is_ascii_digit() { + exponent = exponent + .checked_mul(10)? + .checked_add(i64::from(bytes[i] - b'0'))?; + i += 1; + } + if i == exp_start { + return None; + } + if exp_negative { + exponent = exponent.checked_neg()?; + } + } + + if i != bytes.len() { + return None; + } + + // Trailing fractional zeros do not affect integrality and would only + // inflate the significand before we re-scale it. + while frac_end > frac_start && bytes[frac_end - 1] == b'0' { + frac_end -= 1; + } + + let mut significand = 0i128; + let mut nonzero_digit_seen = false; + for &digit in bytes[int_start..int_end] + .iter() + .chain(bytes[frac_start..frac_end].iter()) + { + if !nonzero_digit_seen && digit == b'0' { + continue; + } + nonzero_digit_seen = true; + significand = significand + .checked_mul(10)? + .checked_add(i128::from(digit - b'0'))?; + } + + if !nonzero_digit_seen { + return Some(0); + } + + let scale = exponent.checked_sub((frac_end - frac_start) as i64)?; + if scale >= 0 { + let pow10 = 10i128.checked_pow(u32::try_from(scale).ok()?)?; + significand = significand.checked_mul(pow10)?; + } else { + let divisor = 10i128.checked_pow(u32::try_from(scale.checked_abs()?).ok()?)?; + if significand % divisor != 0 { + return None; + } + significand /= divisor; + } + + if negative { + significand.checked_neg() + } else { + Some(significand) + } } /// Try to interpret an f64 as an exact integer. diff --git a/buffa/src/json_helpers/tests.rs b/buffa/src/json_helpers/tests.rs index 2a7e988c..6b659e39 100644 --- a/buffa/src/json_helpers/tests.rs +++ b/buffa/src/json_helpers/tests.rs @@ -246,6 +246,20 @@ fn int64_deserializes_from_quoted_string() { assert_eq!(val.0, -9007199254740993i64); } +#[test] +fn int64_deserializes_exact_float_notation_strings() { + #[rustfmt::skip] + let cases: &[(&str, i64)] = &[ + (r#""9007199254740993.0""#, 9007199254740993), + (r#""9.007199254740993e15""#, 9007199254740993), + (r#""1200e-2""#, 12), + ]; + for &(json, expected) in cases { + let val: SerdeInt64 = serde_json::from_str(json).unwrap(); + assert_eq!(val.0, expected, "input: {json}"); + } +} + // ── uint64 ────────────────────────────────────────────────────────────── #[test] @@ -260,6 +274,12 @@ fn uint64_deserializes_from_quoted_string() { assert_eq!(val.0, u64::MAX); } +#[test] +fn uint64_deserializes_exact_float_notation_string() { + let val: SerdeUint64 = serde_json::from_str(r#""18446744073709551615.0""#).unwrap(); + assert_eq!(val.0, u64::MAX); +} + // ── float ─────────────────────────────────────────────────────────────── #[test] @@ -384,6 +404,11 @@ fn int64_rejects_overflow_string() { assert!(serde_json::from_str::(r#""99999999999999999999""#).is_err()); } +#[test] +fn int64_rejects_inexact_exponential_string() { + assert!(serde_json::from_str::(r#""1e-1""#).is_err()); +} + #[test] fn uint64_rejects_negative_string() { // "-1" parses as i64 but fails u64::try_from @@ -963,11 +988,13 @@ fn int32_deserialize_table() { ("42", Some(42)), // bare number (r#""42""#, Some(42)), // quoted string (r#""1e2""#, Some(100)), // exponential string notation + (r#""1200e-2""#, Some(12)), // exact negative exponent ("null", Some(0)), // null → 0 ("1.0", Some(1)), // integer-valued f64 (visit_f64) ("-2147483648", Some(i32::MIN)), // boundary ("2147483647", Some(i32::MAX)), // boundary ("9223372036854775807", None), // i64::MAX overflow → error + (r#""1e-1""#, None), // non-integral exponential string → error (r#""1.5""#, None), // fractional string → error ("1.5", None), // fractional f64 → error ]; From 306dd37d239604f3d42bdd95408809e4e755f303 Mon Sep 17 00:00:00 2001 From: Iain McGinniss <309153+iainmcgin@users.noreply.github.com> Date: Sun, 28 Jun 2026 21:19:25 +0000 Subject: [PATCH 2/3] json: add significand-overflow, huge-exponent, and negative exact-value tests; changelog fragment --- .../unreleased/fixed-20260628-211100.yaml | 3 +++ buffa/src/json_helpers/tests.rs | 21 +++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 .changes/unreleased/fixed-20260628-211100.yaml diff --git a/.changes/unreleased/fixed-20260628-211100.yaml b/.changes/unreleased/fixed-20260628-211100.yaml new file mode 100644 index 00000000..8bbad089 --- /dev/null +++ b/.changes/unreleased/fixed-20260628-211100.yaml @@ -0,0 +1,3 @@ +kind: Fixed +body: 'JSON: quoted integer strings in decimal/exponent notation (e.g. `"9007199254740993.0"`, `"9.007199254740993e15"`) now parse exactly instead of routing through `f64`, which silently rounded magnitudes above 2^53. Non-integral and overflowing strings are still rejected. (#255)' +time: 2026-06-28T21:11:00.000000000Z diff --git a/buffa/src/json_helpers/tests.rs b/buffa/src/json_helpers/tests.rs index 6b659e39..d9fc4e33 100644 --- a/buffa/src/json_helpers/tests.rs +++ b/buffa/src/json_helpers/tests.rs @@ -260,6 +260,12 @@ fn int64_deserializes_exact_float_notation_strings() { } } +#[test] +fn int64_deserializes_exact_negative_float_notation_string() { + let val: SerdeInt64 = serde_json::from_str(r#""-9007199254740993.0""#).unwrap(); + assert_eq!(val.0, -9007199254740993i64); +} + // ── uint64 ────────────────────────────────────────────────────────────── #[test] @@ -409,6 +415,21 @@ fn int64_rejects_inexact_exponential_string() { assert!(serde_json::from_str::(r#""1e-1""#).is_err()); } +#[test] +fn int64_rejects_significand_overflow_string() { + // 40 digits overflows even the i128 significand accumulator, exercising + // the checked-arithmetic path rather than the i64 try_from narrowing. + assert!( + serde_json::from_str::(r#""9999999999999999999999999999999999999999""#) + .is_err() + ); +} + +#[test] +fn int64_rejects_huge_exponent_string() { + assert!(serde_json::from_str::(r#""1e9999999999""#).is_err()); +} + #[test] fn uint64_rejects_negative_string() { // "-1" parses as i64 but fails u64::try_from From 07b604f80afd643fb451f7c6ea6a990297faaa83 Mon Sep 17 00:00:00 2001 From: Iain McGinniss <309153+iainmcgin@users.noreply.github.com> Date: Sun, 28 Jun 2026 23:04:52 +0000 Subject: [PATCH 3/3] json: restructure exact decimal parser around str combinators Replace the manual byte-index scanner with split_once/strip_prefix/ trim_end_matches and fold digit validation into the significand accumulation loop. Same accept/reject behavior and values; roughly half the code. --- buffa/src/json_helpers.rs | 107 ++++++++------------------------------ 1 file changed, 23 insertions(+), 84 deletions(-) diff --git a/buffa/src/json_helpers.rs b/buffa/src/json_helpers.rs index 75adf734..e868e8f5 100644 --- a/buffa/src/json_helpers.rs +++ b/buffa/src/json_helpers.rs @@ -926,109 +926,48 @@ fn parse_int_from_str>(v: &str) -> Option { /// decimal scientific notation like `"1e5"` / `"1.0e2"`. Returns `None` if /// the value is not mathematically integral or would overflow `i128`. fn parse_exact_decimal_int(v: &str) -> Option { - let bytes = v.as_bytes(); - if bytes.is_empty() { - return None; - } - - let mut i = 0usize; - let negative = match bytes[0] { - b'+' => { - i = 1; - false - } - b'-' => { - i = 1; - true - } - _ => false, + let (mantissa, exp) = match v.split_once(['e', 'E']) { + // `i64::from_str` handles the exponent's sign and rejects empty, + // non-digit, and i64-overflowing exponents. + Some((m, e)) => (m, e.parse::().ok()?), + None => (v, 0), }; - - let int_start = i; - while i < bytes.len() && bytes[i].is_ascii_digit() { - i += 1; - } - let int_end = i; - - let mut frac_start = i; - let mut frac_end = i; - if i < bytes.len() && bytes[i] == b'.' { - i += 1; - frac_start = i; - while i < bytes.len() && bytes[i].is_ascii_digit() { - i += 1; - } - frac_end = i; - } - - if int_start == int_end && frac_start == frac_end { - return None; - } - - let mut exponent = 0i64; - if i < bytes.len() && matches!(bytes[i], b'e' | b'E') { - i += 1; - let exp_negative = match bytes.get(i) { - Some(b'+') => { - i += 1; - false - } - Some(b'-') => { - i += 1; - true - } - _ => false, - }; - let exp_start = i; - while i < bytes.len() && bytes[i].is_ascii_digit() { - exponent = exponent - .checked_mul(10)? - .checked_add(i64::from(bytes[i] - b'0'))?; - i += 1; - } - if i == exp_start { - return None; - } - if exp_negative { - exponent = exponent.checked_neg()?; - } - } - - if i != bytes.len() { + let (negative, mantissa) = match mantissa.strip_prefix('-') { + Some(rest) => (true, rest), + None => (false, mantissa.strip_prefix('+').unwrap_or(mantissa)), + }; + let (int_part, frac_part) = mantissa + .split_once('.') + .map_or((mantissa, ""), |(i, f)| (i, f)); + if int_part.is_empty() && frac_part.is_empty() { return None; } - // Trailing fractional zeros do not affect integrality and would only // inflate the significand before we re-scale it. - while frac_end > frac_start && bytes[frac_end - 1] == b'0' { - frac_end -= 1; - } + let frac_part = frac_part.trim_end_matches('0'); let mut significand = 0i128; - let mut nonzero_digit_seen = false; - for &digit in bytes[int_start..int_end] - .iter() - .chain(bytes[frac_start..frac_end].iter()) - { - if !nonzero_digit_seen && digit == b'0' { - continue; + for digit in int_part.bytes().chain(frac_part.bytes()) { + if !digit.is_ascii_digit() { + return None; } - nonzero_digit_seen = true; significand = significand .checked_mul(10)? .checked_add(i128::from(digit - b'0'))?; } - - if !nonzero_digit_seen { + if significand == 0 { + // Zero is integral under any exponent; short-circuit before the + // power-of-ten scaling below can overflow on e.g. "0e100". return Some(0); } - let scale = exponent.checked_sub((frac_end - frac_start) as i64)?; + // value = significand × 10^(exp − frac_len) + let scale = exp.checked_sub(frac_part.len() as i64)?; if scale >= 0 { let pow10 = 10i128.checked_pow(u32::try_from(scale).ok()?)?; significand = significand.checked_mul(pow10)?; } else { - let divisor = 10i128.checked_pow(u32::try_from(scale.checked_abs()?).ok()?)?; + let divisor = 10i128.checked_pow(u32::try_from(scale.unsigned_abs()).ok()?)?; if significand % divisor != 0 { return None; }