Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion lib/crewai-files/src/crewai_files/formatting/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,8 @@ def _format_block(
return formatter.format_block(file_input, resolved)
if isinstance(formatter, OpenAIResponsesFormatter):
return formatter.format_block(resolved, file_input.content_type)
if isinstance(formatter, (OpenAIFormatter, GeminiFormatter)):
if isinstance(formatter, OpenAIFormatter):
return formatter.format_block(resolved, file_input.content_type)
if isinstance(formatter, GeminiFormatter):
return formatter.format_block(resolved)
raise TypeError(f"Unknown formatter type: {type(formatter).__name__}")
323 changes: 171 additions & 152 deletions lib/crewai-files/src/crewai_files/formatting/openai.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
"""OpenAI content block formatter."""

from __future__ import annotations

import base64
from typing import Any

"""OpenAI content block formatter."""
from __future__ import annotations
import base64
from typing import Any
from crewai_files.core.resolved import (
FileReference,
InlineBase64,
Expand All @@ -14,151 +14,170 @@
)


class OpenAIResponsesFormatter:
"""Formats resolved files into OpenAI Responses API content blocks.

The Responses API uses a different format than Chat Completions:
- Text uses `type: "input_text"` instead of `type: "text"`
- Images use `type: "input_image"` with `file_id` or `image_url`
- PDFs use `type: "input_file"` with `file_id`, `file_url`, or `file_data`
"""

@staticmethod
def format_text_content(text: str) -> dict[str, Any]:
"""Format text as an OpenAI Responses API content block.

Args:
text: The text content to format.
def _normalize_media_type(content_type: str) -> str:
"""Normalize MIME types before provider capability checks."""
return content_type.split(";", 1)[0].strip().lower()

Returns:
A content block with type "input_text".
"""
return {"type": "input_text", "text": text}

@staticmethod
class OpenAIResponsesFormatter:
"""Formats resolved files into OpenAI Responses API content blocks.

The Responses API uses a different format than Chat Completions:
- Text uses `type: "input_text"` instead of `type: "text"`
- Images use `type: "input_image"` with `file_id` or `image_url`
- PDFs use `type: "input_file"` with `file_id`, `file_url`, or `file_data`
"""

@staticmethod
def format_text_content(text: str) -> dict[str, Any]:
"""Format text as an OpenAI Responses API content block.

Args:
text: The text content to format.

Returns:
A content block with type "input_text".
"""
return {"type": "input_text", "text": text}

@staticmethod
def format_block(resolved: ResolvedFileType, content_type: str) -> dict[str, Any]:
"""Format a resolved file into an OpenAI Responses API content block.

Args:
resolved: Resolved file.
content_type: MIME type of the file.

Returns:
Content block dict.

Raises:
TypeError: If resolved type is not supported.
"""
is_image = content_type.startswith("image/")
is_pdf = content_type == "application/pdf"

if isinstance(resolved, FileReference):
if is_image:
return {
"type": "input_image",
"file_id": resolved.file_id,
}
if is_pdf:
return {
"type": "input_file",
"file_id": resolved.file_id,
}
raise TypeError(
f"Unsupported content type for Responses API: {content_type}"
)

if isinstance(resolved, UrlReference):
if is_image:
return {
"type": "input_image",
"image_url": resolved.url,
}
if is_pdf:
return {
"type": "input_file",
"file_url": resolved.url,
}
raise TypeError(
f"Unsupported content type for Responses API: {content_type}"
)

if isinstance(resolved, InlineBase64):
if is_image:
return {
"type": "input_image",
"image_url": f"data:{resolved.content_type};base64,{resolved.data}",
}
if is_pdf:
return {
"type": "input_file",
"filename": "document.pdf",
"file_data": f"data:{resolved.content_type};base64,{resolved.data}",
}
raise TypeError(
f"Unsupported content type for Responses API: {content_type}"
)

if isinstance(resolved, InlineBytes):
data = base64.b64encode(resolved.data).decode("ascii")
if is_image:
return {
"type": "input_image",
"image_url": f"data:{resolved.content_type};base64,{data}",
}
if is_pdf:
return {
"type": "input_file",
"filename": "document.pdf",
"file_data": f"data:{resolved.content_type};base64,{data}",
}
raise TypeError(
f"Unsupported content type for Responses API: {content_type}"
)

raise TypeError(f"Unexpected resolved type: {type(resolved).__name__}")


class OpenAIFormatter:
"""Formats resolved files into OpenAI content blocks."""

@staticmethod
def format_block(resolved: ResolvedFileType) -> dict[str, Any]:
"""Format a resolved file into an OpenAI content block.

Args:
resolved: Resolved file.

Returns:
Content block dict.

Raises:
TypeError: If resolved type is not supported.
"""
if isinstance(resolved, FileReference):
return {
"type": "file",
"file": {"file_id": resolved.file_id},
}

if isinstance(resolved, UrlReference):
return {
"type": "image_url",
"image_url": {"url": resolved.url},
}

if isinstance(resolved, InlineBase64):
return {
"type": "image_url",
"image_url": {
"url": f"data:{resolved.content_type};base64,{resolved.data}"
},
}

if isinstance(resolved, InlineBytes):
data = base64.b64encode(resolved.data).decode("ascii")
return {
"type": "image_url",
"image_url": {"url": f"data:{resolved.content_type};base64,{data}"},
}

raise TypeError(f"Unexpected resolved type: {type(resolved).__name__}")
"""Format a resolved file into an OpenAI Responses API content block.

Args:
resolved: Resolved file.
content_type: MIME type of the file.

Returns:
Content block dict.

Raises:
TypeError: If resolved type is not supported.
"""
media_type = _normalize_media_type(content_type)
is_image = media_type.startswith("image/")
is_pdf = media_type == "application/pdf"

if isinstance(resolved, FileReference):
if is_image:
return {
"type": "input_image",
"file_id": resolved.file_id,
}
if is_pdf:
return {
"type": "input_file",
"file_id": resolved.file_id,
}
raise TypeError(
f"Unsupported content type for Responses API: {content_type}"
)

if isinstance(resolved, UrlReference):
if is_image:
return {
"type": "input_image",
"image_url": resolved.url,
}
if is_pdf:
return {
"type": "input_file",
"file_url": resolved.url,
}
raise TypeError(
f"Unsupported content type for Responses API: {content_type}"
)

if isinstance(resolved, InlineBase64):
if is_image:
return {
"type": "input_image",
"image_url": f"data:{resolved.content_type};base64,{resolved.data}",
}
if is_pdf:
return {
"type": "input_file",
"filename": "document.pdf",
"file_data": f"data:{resolved.content_type};base64,{resolved.data}",
}
raise TypeError(
f"Unsupported content type for Responses API: {content_type}"
)

if isinstance(resolved, InlineBytes):
data = base64.b64encode(resolved.data).decode("ascii")
if is_image:
return {
"type": "input_image",
"image_url": f"data:{resolved.content_type};base64,{data}",
}
if is_pdf:
return {
"type": "input_file",
"filename": "document.pdf",
"file_data": f"data:{resolved.content_type};base64,{data}",
}
raise TypeError(
f"Unsupported content type for Responses API: {content_type}"
)

raise TypeError(f"Unexpected resolved type: {type(resolved).__name__}")


class OpenAIFormatter:
"""Formats resolved files into OpenAI content blocks."""

@staticmethod
def format_block(resolved: ResolvedFileType, content_type: str = "") -> dict[str, Any]:
"""Format a resolved file into an OpenAI content block.

Args:
resolved: Resolved file.
content_type: MIME type of the file content.

Returns:
Content block dict.

Raises:
TypeError: If resolved type is not supported, or if the content
type (e.g. PDF) is not supported by the OpenAI Chat Completions
API.
"""
media_type = _normalize_media_type(content_type)
is_pdf = media_type == "application/pdf"

if is_pdf:
raise TypeError(
"OpenAI Chat Completions API does not support PDF attachments. "
"Use the Responses API (api='responses') or a different provider "
"that supports PDF content."
)

if isinstance(resolved, FileReference):
return {
"type": "file",
"file": {"file_id": resolved.file_id},
}

if isinstance(resolved, UrlReference):
return {
"type": "image_url",
"image_url": {"url": resolved.url},
}

if isinstance(resolved, InlineBase64):
return {
"type": "image_url",
"image_url": {
"url": f"data:{resolved.content_type};base64,{resolved.data}"
},
}

if isinstance(resolved, InlineBytes):
data = base64.b64encode(resolved.data).decode("ascii")
return {
"type": "image_url",
"image_url": {"url": f"data:{resolved.content_type};base64,{data}"},
}

raise TypeError(f"Unexpected resolved type: {type(resolved).__name__}")
31 changes: 31 additions & 0 deletions lib/crewai-files/tests/test_openai_formatting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""Tests for OpenAI file formatting helpers."""

from crewai_files.core.resolved import InlineBase64
from crewai_files.formatting.openai import OpenAIFormatter, OpenAIResponsesFormatter
import pytest


PDF_CONTENT_TYPES = [
"application/pdf",
"application/pdf; charset=binary",
"Application/PDF",
]


@pytest.mark.parametrize("content_type", PDF_CONTENT_TYPES)
def test_openai_formatter_rejects_pdf_variants(content_type: str):
resolved = InlineBase64(content_type="application/pdf", data="ZmFrZS1wZGY=")

with pytest.raises(TypeError, match="does not support PDF attachments"):
OpenAIFormatter.format_block(resolved, content_type)


@pytest.mark.parametrize("content_type", PDF_CONTENT_TYPES)
def test_openai_responses_formatter_accepts_pdf_variants(content_type: str):
resolved = InlineBase64(content_type="application/pdf", data="ZmFrZS1wZGY=")

block = OpenAIResponsesFormatter.format_block(resolved, content_type)

assert block["type"] == "input_file"
assert block["filename"] == "document.pdf"
assert block["file_data"].startswith("data:application/pdf;base64,")