From 415173f9b18d149a3f8723b7d0055e226fa38234 Mon Sep 17 00:00:00 2001 From: Lucas Ma <7184042+pony-maggie@users.noreply.github.com> Date: Mon, 15 Jun 2026 08:30:20 +0800 Subject: [PATCH] fix: truncate uppercase data image URIs --- .../src/markitdown/converters/_markdownify.py | 2 +- packages/markitdown/tests/test_module_misc.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/packages/markitdown/src/markitdown/converters/_markdownify.py b/packages/markitdown/src/markitdown/converters/_markdownify.py index 19e8a2984..a07614b0f 100644 --- a/packages/markitdown/src/markitdown/converters/_markdownify.py +++ b/packages/markitdown/src/markitdown/converters/_markdownify.py @@ -104,7 +104,7 @@ def convert_img( return alt # Remove dataURIs - if src.startswith("data:") and not self.options["keep_data_uris"]: + if src.lower().startswith("data:") and not self.options["keep_data_uris"]: src = src.split(",")[0] + "..." return "" % (alt, src, title_part) diff --git a/packages/markitdown/tests/test_module_misc.py b/packages/markitdown/tests/test_module_misc.py index 4d62e4919..2568ac106 100644 --- a/packages/markitdown/tests/test_module_misc.py +++ b/packages/markitdown/tests/test_module_misc.py @@ -220,6 +220,21 @@ def test_data_uris() -> None: assert data == b"Hello, World!" +def test_uppercase_data_image_uri_is_truncated_by_default() -> None: + markitdown = MarkItDown() + html = b'