Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ def __init__(
Args:
endpoint: CU resource endpoint URL.
credential: Explicit credential. If None, falls back to
AZURE_API_KEY env var, then DefaultAzureCredential.
AZURE_CONTENT_UNDERSTANDING_KEY env var, then DefaultAzureCredential.
analyzer_id: Custom analyzer for compatible file types.
When set, the converter checks the analyzer's base modality
(via get_analyzer() at init) and routes only compatible
Expand Down Expand Up @@ -487,7 +487,7 @@ def __init__(

# Resolve credential
if credential is None:
api_key = os.environ.get("AZURE_API_KEY")
api_key = os.environ.get("AZURE_CONTENT_UNDERSTANDING_KEY")
if api_key is not None:
credential = AzureKeyCredential(api_key)
else:
Expand Down
54 changes: 54 additions & 0 deletions tests/test_invariant__cu_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import pytest
import os
import sys
from unittest.mock import patch, MagicMock

# Add the module path to sys.path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'packages/markitdown/src'))

from markitdown.converters._cu_converter import CUConverter

@pytest.mark.parametrize("api_key_payload", [
# Exact exploit case: API key in environment variable
"sk-live-1234567890abcdef1234567890abcdef",
# Boundary case: Empty API key (should not expose anything)
"",
# Valid input: Non-sensitive placeholder
"placeholder_key",
# Attack payload: Attempt to extract key via error message
"../../../etc/passwd",
# Attack payload: SQL injection style pattern
"' OR '1'='1' --",
])
def test_api_keys_not_exposed_in_output_or_errors(api_key_payload):
"""Invariant: Converter must never expose API keys in output, logs, or error messages."""

with patch.dict(os.environ, {'AZURE_API_KEY': api_key_payload}):
# Initialize converter with mocked dependencies to prevent actual API calls
with patch('azure.ai.documentintelligence.DocumentIntelligenceClient') as mock_client:
mock_client.return_value = MagicMock()

converter = CUConverter(
endpoint="https://fake-endpoint.cognitiveservices.azure.com/",
analyzer_id=None,
file_types=None
)

# Test that the API key is not accessible through any public attribute
assert not hasattr(converter, 'api_key'), "Converter should not store API key as public attribute"

# Test that string representation doesn't contain the key
converter_str = str(converter)
assert api_key_payload not in converter_str, f"API key leaked in string representation: {converter_str}"

# Test that repr doesn't contain the key
converter_repr = repr(converter)
assert api_key_payload not in converter_repr, f"API key leaked in repr: {converter_repr}"

# Test error handling doesn't expose the key
try:
# Force an error by calling convert with invalid input
converter.convert(None)
except Exception as e:
error_str = str(e)
assert api_key_payload not in error_str, f"API key leaked in error message: {error_str}"