From b527ab069e9b85525aaf3c6a8dbeff6e699c87c9 Mon Sep 17 00:00:00 2001 From: Zhidong Peng Date: Tue, 19 May 2026 16:39:34 +0000 Subject: [PATCH 1/2] json_read_from_file to skip the BOM bytes before deserialize the content. --- proxy_agent_shared/src/misc_helpers.rs | 61 +++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 2 deletions(-) diff --git a/proxy_agent_shared/src/misc_helpers.rs b/proxy_agent_shared/src/misc_helpers.rs index 9d121c0d..38ed5e18 100644 --- a/proxy_agent_shared/src/misc_helpers.rs +++ b/proxy_agent_shared/src/misc_helpers.rs @@ -249,8 +249,18 @@ pub fn json_read_from_file(file_path: &Path) -> Result where T: DeserializeOwned, { - let file = File::open(file_path)?; - let obj: T = serde_json::from_reader(file)?; + // Read the whole file to bytes so we can transparently skip an optional + // UTF-8 BOM (EF BB BF). serde_json does not strip a BOM and would otherwise + // fail the parse with "expected value at line 1 column 1" for any file + // produced by editors / tools that default to BOM-prefixed UTF-8 (e.g. + // Windows PowerShell 5.1's `Set-Content -Encoding UTF8`, Notepad, VS Code's + // "UTF-8 with BOM"). + let bytes = fs::read(file_path)?; + let payload = match bytes.as_slice() { + [0xEF, 0xBB, 0xBF, rest @ ..] => rest, + rest => rest, + }; + let obj: T = serde_json::from_slice(payload)?; Ok(obj) } @@ -587,6 +597,53 @@ mod tests { _ = fs::remove_dir_all(&temp_test_path); } + #[test] + fn json_read_from_file_skips_utf8_bom_test() { + #[derive(Serialize, Deserialize, PartialEq, Debug)] + struct Small { + name: String, + value: u32, + } + + let mut temp_test_path = env::temp_dir(); + temp_test_path.push("json_read_from_file_skips_utf8_bom_test"); + _ = fs::remove_dir_all(&temp_test_path); + super::try_create_folder(&temp_test_path).unwrap(); + + let body = r#"{"name":"hello","value":42}"#; + let expected = Small { + name: "hello".to_string(), + value: 42, + }; + + // 1. BOM-less file parses (regression guard for existing behavior). + let no_bom = temp_test_path.join("no_bom.json"); + fs::write(&no_bom, body.as_bytes()).unwrap(); + assert_eq!( + super::json_read_from_file::(&no_bom).unwrap(), + expected + ); + + // 2. UTF-8 BOM-prefixed file parses (the actual fix). + let with_bom = temp_test_path.join("with_bom.json"); + let mut bytes = Vec::with_capacity(3 + body.len()); + bytes.extend_from_slice(&[0xEF, 0xBB, 0xBF]); + bytes.extend_from_slice(body.as_bytes()); + fs::write(&with_bom, &bytes).unwrap(); + assert_eq!( + super::json_read_from_file::(&with_bom).unwrap(), + expected + ); + + // 3. A bare BOM (no payload) still surfaces as a parse error rather + // than being silently treated as an empty document. + let bom_only = temp_test_path.join("bom_only.json"); + fs::write(&bom_only, &[0xEF, 0xBB, 0xBF]).unwrap(); + assert!(super::json_read_from_file::(&bom_only).is_err()); + + _ = fs::remove_dir_all(&temp_test_path); + } + #[test] fn path_to_string_test() { let path = "path_to_string_test"; From 4b58830173b7184a08abb3f31fb47daff4b5c7e0 Mon Sep 17 00:00:00 2001 From: Zhidong Peng Date: Tue, 19 May 2026 18:03:04 +0000 Subject: [PATCH 2/2] fix formatting --- proxy_agent_shared/src/misc_helpers.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proxy_agent_shared/src/misc_helpers.rs b/proxy_agent_shared/src/misc_helpers.rs index 38ed5e18..151b9e55 100644 --- a/proxy_agent_shared/src/misc_helpers.rs +++ b/proxy_agent_shared/src/misc_helpers.rs @@ -254,7 +254,7 @@ where // fail the parse with "expected value at line 1 column 1" for any file // produced by editors / tools that default to BOM-prefixed UTF-8 (e.g. // Windows PowerShell 5.1's `Set-Content -Encoding UTF8`, Notepad, VS Code's - // "UTF-8 with BOM"). + // "UTF-8 with BOM"). let bytes = fs::read(file_path)?; let payload = match bytes.as_slice() { [0xEF, 0xBB, 0xBF, rest @ ..] => rest,