From fade8360946b57101ef7cc30f2edf51f277feb48 Mon Sep 17 00:00:00 2001 From: JSap0914 Date: Tue, 16 Jun 2026 19:57:26 +0900 Subject: [PATCH] fix: ZipConverter emits '(unknown)' instead of 'None' when stream has no source info When MarkItDown.convert_stream() is called with a ZIP stream that has no associated URL, local_path, or filename (e.g. a raw io.BytesIO), the ZipConverter header read: Content from the zip file `None`: because stream_info.url, stream_info.local_path, and stream_info.filename were all None and Python f-strings render None as the literal string 'None'. Fix: fall back to '(unknown)' when all three source-info fields are absent, producing the more descriptive: Content from the zip file `(unknown)`: Add a regression test in test_module_misc.py that verifies the output does not contain the literal string 'None' in this scenario. --- .../markitdown/converters/_zip_converter.py | 7 +++++- packages/markitdown/tests/test_module_misc.py | 22 +++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/packages/markitdown/src/markitdown/converters/_zip_converter.py b/packages/markitdown/src/markitdown/converters/_zip_converter.py index f87e6c890..3d388a781 100644 --- a/packages/markitdown/src/markitdown/converters/_zip_converter.py +++ b/packages/markitdown/src/markitdown/converters/_zip_converter.py @@ -90,7 +90,12 @@ def convert( stream_info: StreamInfo, **kwargs: Any, # Options to pass to the converter ) -> DocumentConverterResult: - file_path = stream_info.url or stream_info.local_path or stream_info.filename + file_path = ( + stream_info.url + or stream_info.local_path + or stream_info.filename + or "(unknown)" + ) md_content = f"Content from the zip file `{file_path}`:\n\n" with zipfile.ZipFile(file_stream, "r") as zipObj: diff --git a/packages/markitdown/tests/test_module_misc.py b/packages/markitdown/tests/test_module_misc.py index 4d62e4919..5c99d08a8 100644 --- a/packages/markitdown/tests/test_module_misc.py +++ b/packages/markitdown/tests/test_module_misc.py @@ -3,6 +3,7 @@ import os import re import shutil +import zipfile import pytest from unittest.mock import MagicMock @@ -532,6 +533,27 @@ def test_markitdown_llm() -> None: validate_strings(result, PPTX_TEST_STRINGS) +def test_zip_stream_no_filename_header() -> None: + """Regression test: ZipConverter must not render the literal string 'None' + in the output header when the stream has no associated URL, local path, or + filename (e.g. when called via convert_stream() without stream_info).""" + markitdown = MarkItDown() + + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w") as zf: + zf.writestr("hello.txt", "Hello world") + buf.seek(0) + + result = markitdown.convert_stream( + buf, stream_info=StreamInfo(mimetype="application/zip") + ) + assert ( + "None" not in result.markdown + ), f"Header must not contain literal 'None'; got: {result.markdown[:120]!r}" + assert "Content from the zip file" in result.markdown + assert "Hello world" in result.markdown + + if __name__ == "__main__": """Runs this file's tests from the command line.""" for test in [