Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
98d2523
Parametrize chunk and message join separators
juliendenize Jun 4, 2026
7aa8189
Use TypeGuard for type-safe content narrowing
juliendenize Jun 4, 2026
0cb2e81
Fix chat template intra-message chunk joining to match normalizer
juliendenize Jun 4, 2026
d9d2e63
Restrict intra-message chunk join change to V15 only
juliendenize Jun 4, 2026
1842eb4
Address review nits: remove cur_text_len, combine branches with or
juliendenize Jun 5, 2026
16ecf70
Consolidate multimodal ContentChunk support for all message roles
juliendenize Jun 5, 2026
4f07358
Address PR review: remove assistant multimodal, simplify normalizer, …
juliendenize Jun 9, 2026
823ee67
Require content in UserMessage and SystemMessage from_openai
juliendenize Jun 9, 2026
655180c
Skip JSON normalization of tool content for V7+ normalizers
juliendenize Jun 9, 2026
ebe26d9
Use TypeGuard for assistant content, simplify tool content narrowing,…
juliendenize Jun 9, 2026
fee1021
Remove _narrow_tool_content, inline logic directly
juliendenize Jun 9, 2026
dc09e05
Fix mypy errors and enable V15 assistant multimodal in chat templates
juliendenize Jun 9, 2026
db684eb
Restore V7 tool content validation, add V15 override accepting all ch…
juliendenize Jun 9, 2026
affebf5
Regenerate V15 golden chat templates with assistant multimodal support
juliendenize Jun 9, 2026
a8b65cc
Restore _are_text_chunks guard, remove assistant image/audio from tem…
juliendenize Jun 9, 2026
d16b7ef
Apply suggestions from code review
juliendenize Jun 9, 2026
a701ae3
Use content chunk type aliases in TypeGuard and narrowing function si…
juliendenize Jun 9, 2026
a974af1
Revert encode_assistant_message to list[int], reject audio in V7-V13 …
juliendenize Jun 9, 2026
2cf685e
Revert V7 audio rejection: audio supported in system messages from V7+
juliendenize Jun 9, 2026
953873e
Add pre-V7 system message rejection tests for audio and think chunks
juliendenize Jun 9, 2026
e73d56c
Fix review nits: stale comment, misleading return, test parametrize
juliendenize Jun 9, 2026
354e244
Narrow encode_system_message return type to exclude images
juliendenize Jun 9, 2026
113acb0
Add REQUEST_TOOL_AUDIO_TRAIN fixture for tool message with audio
juliendenize Jun 9, 2026
baf29e6
Expand TestAssistantContentNarrowing to cover V7, V13, V15 with rejec…
juliendenize Jun 9, 2026
3c1228d
Refactor test_parity to use _get_conversations with to_openai conversion
juliendenize Jun 9, 2026
4ac82b7
Reorganize V15 tokenizer test fixtures, add text assertions
juliendenize Jun 9, 2026
1035be1
Use ToolContentChunk type alias in _parse_tool_content signature
juliendenize Jun 9, 2026
729050d
Use walrus operator for reasoning_effort, fix double backticks
juliendenize Jun 9, 2026
69ab729
Refactor normalizer tests into version-specific classes, add error ma…
juliendenize Jun 9, 2026
21860dc
Reject ThinkChunk in assistant messages for pre-v11 normalizers, remo…
juliendenize Jun 10, 2026
53057e2
Refactor normalizer tests to use Pydantic model equality assertions
juliendenize Jun 10, 2026
66af65b
Add back intra-message ThinkChunk aggregation test to V13 class
juliendenize Jun 10, 2026
864d127
Assert full InstructRequest output in normalizer tests
juliendenize Jun 10, 2026
75f78e9
Remove redundant empty reasoning-effort branch in test helper
juliendenize Jun 12, 2026
093f44e
Add content-chunk type validation to base request validator
juliendenize Jun 12, 2026
091e1f4
Add per-version content-chunk validation overrides
juliendenize Jun 12, 2026
b210943
Clean up content-chunk validator hooks per review
juliendenize Jun 12, 2026
43b15e6
Move content-chunk validation out of the normalizer
juliendenize Jun 12, 2026
f995ddd
Reconcile chunk-type tests across Pydantic, normalizer and validator …
juliendenize Jun 12, 2026
a14f855
Polish normalizer docstrings and tighten chunk-type tests per review
juliendenize Jun 12, 2026
0385e80
Replace normalizer casts with content-chunk TypeGuards in chunk.py
juliendenize Jun 12, 2026
c36ddbf
Reorganize chunk-type tests into per-version classes; drop normalizer…
juliendenize Jun 12, 2026
216e85d
Add informative messages to normalizer content TypeGuard asserts
juliendenize Jun 12, 2026
5bfde18
Restructure chunk-type tests into exhaustive allow/disallow methods
juliendenize Jun 12, 2026
34d57d9
Type test chunk helpers as Any to satisfy mypy on message construction
juliendenize Jun 12, 2026
46bd99f
Centralize content-chunk allow-lists on message classes
juliendenize Jun 15, 2026
b57956e
Fix mypy.
juliendenize Jun 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/mistral_common/experimental/app/routers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
)
from mistral_common.experimental.think import _split_content_and_think_chunks
from mistral_common.experimental.tools import _decode_tool_calls, _split_content_and_tool_calls
from mistral_common.protocol.instruct.chunk import TextChunk, ThinkChunk
from mistral_common.protocol.instruct.chunk import ContentChunk, TextChunk, ThinkChunk
from mistral_common.protocol.instruct.messages import AssistantMessage
from mistral_common.protocol.instruct.request import ChatCompletionRequest
from mistral_common.tokens.tokenizers.base import SpecialTokenPolicy, Tokenized, TokenizerVersion
Expand Down Expand Up @@ -100,7 +100,7 @@ async def detokenize_to_assistant_message(
else:
content_tokens, tool_calls_tokens = tokens, ()

content: str | list[TextChunk | ThinkChunk] | None = None
content: str | list[ContentChunk] | None = None

if settings.tokenizer.instruct_tokenizer.tokenizer.version >= TokenizerVersion.v13:
assert isinstance(settings.tokenizer.instruct_tokenizer, InstructTokenizerV13)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,30 @@ def validates_assistant_non_empty(self) -> bool:
r"""Whether to validate that assistant messages have non-empty content or tool calls."""
return self.version >= TokenizerVersion.v7 or (self.version >= TokenizerVersion.v3 and not self.spm)

@property
def tool_supports_multimodal(self) -> bool:
r"""Whether tool messages can contain non-text content chunks. V15+."""
return self.version >= TokenizerVersion.v15

@property
def system_supports_audio(self) -> bool:
r"""Whether system messages can contain audio. V15+ with audio support."""
return self.audio_support and self.version >= TokenizerVersion.v15


def _join_types_desc(parts: list[str]) -> str:
r"""Join type names into a human-readable description string.

Args:
parts: List of type names (e.g. ["text", "thinking", "image"]).

Returns:
Formatted string like "text", "text and thinking", or "text, thinking and image".
"""
if len(parts) == 1:
return parts[0]
return ", ".join(parts[:-1]) + " and " + parts[-1]


def _generate_header(config: TemplateConfig) -> str:
r"""Generate template header with default system message.
Expand Down Expand Up @@ -872,6 +896,8 @@ def _generate_system_message_handling(config: TemplateConfig) -> str:
if has_extra_types:
if config.system_supports_thinking:
rc_args += ", supported_types_desc='text and thinking'"
elif config.system_supports_audio:
rc_args += ", supported_types_desc='text and audio'"
else:
rc_args += ", supported_types_desc='text'"
if config.any_thinking_support:
Expand All @@ -882,7 +908,7 @@ def _generate_system_message_handling(config: TemplateConfig) -> str:
if config.image_support:
rc_args += ", support_images=false"
if config.audio_support:
rc_args += ", support_audio=false"
rc_args += f", support_audio={'true' if config.system_supports_audio else 'false'}"
lines.append(" {{- render_content(" + rc_args + ") -}}")

lines.append(" {{- '" + _END_SYSTEM + "' -}}")
Expand Down Expand Up @@ -1204,10 +1230,10 @@ def _generate_assistant_message_handling(config: TemplateConfig) -> str:
"""
lines = []

comment_parts = ["text"]
if config.any_thinking_support:
chunk_types = "text and thinking"
else:
chunk_types = "text"
comment_parts.append("thinking")
chunk_types = _join_types_desc(comment_parts)

comment = f"{{#- Assistant messages supports {chunk_types} content. #}}"
lines.append("")
Expand Down Expand Up @@ -1235,10 +1261,10 @@ def _generate_assistant_message_handling(config: TemplateConfig) -> str:
has_extra_types = config.any_thinking_support or config.image_support or config.audio_support
rc_call_args = "message['content'], 'assistant message contents'"
if has_extra_types:
desc_parts = ["text"]
if config.any_thinking_support:
rc_call_args += ", supported_types_desc='text and thinking'"
else:
rc_call_args += ", supported_types_desc='text'"
desc_parts.append("thinking")
rc_call_args += f", supported_types_desc='{_join_types_desc(desc_parts)}'"
if config.any_thinking_support:
rc_call_args += ", support_thinking=true"
if config.image_support:
Expand Down Expand Up @@ -1423,7 +1449,10 @@ def _generate_tool_message_handling(config: TemplateConfig) -> str:
lines.append(" {#- Tool messages supports int, float or text content. #}")
lines.append(" {%- elif message['role'] == 'tool' and ns.index > ns.max_idx_user %}")
else:
lines.append(" {#- Tool messages only supports text content. #}")
if config.tool_supports_multimodal:
lines.append(" {#- Tool messages (multimodal). #}")
else:
lines.append(" {#- Tool messages only supports text content. #}")
lines.append(" {%- elif message['role'] == 'tool' %}")

if config.uses_spm_space_tracking:
Expand Down Expand Up @@ -1484,9 +1513,26 @@ def _generate_tool_message_handling(config: TemplateConfig) -> str:
+ "' }}" # noqa: E501
)
elif config.uses_simple_tool_results:
lines.append(
" {{- '" + _BEGIN_TOOL_RESULTS + "' + message['content']|string + '" + _END_TOOL_RESULTS + "' }}"
) # noqa: E501
if config.tool_supports_multimodal:
tool_rc_args = "message['content'], 'tool message contents'"
if config.image_support or config.audio_support:
desc_parts = ["text"]
if config.image_support:
desc_parts.append("image")
if config.audio_support:
desc_parts.append("audio")
tool_rc_args += f", supported_types_desc='{_join_types_desc(desc_parts)}'"
if config.image_support:
tool_rc_args += ", support_images=true"
if config.audio_support:
tool_rc_args += ", support_audio=true"
lines.append(" {{- '" + _BEGIN_TOOL_RESULTS + "' -}}")
lines.append(" {{- render_content(" + tool_rc_args + ") -}}")
lines.append(" {{- '" + _END_TOOL_RESULTS + "' }}")
else:
lines.append(
" {{- '" + _BEGIN_TOOL_RESULTS + "' + message['content']|string + '" + _END_TOOL_RESULTS + "' }}"
) # noqa: E501
else:
# v3 non-spm style
lines.extend(_emit_int_float_parsing(" "))
Expand Down
3 changes: 0 additions & 3 deletions src/mistral_common/protocol/instruct/chunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,9 +451,6 @@ def from_openai(cls, openai_chunk: dict[str, Any]) -> "ThinkChunk":
ContentChunk = Annotated[
TextChunk | ImageChunk | ImageURLChunk | AudioChunk | AudioURLChunk | ThinkChunk, Field(discriminator="type")
]
UserContentChunk = Annotated[
TextChunk | ImageChunk | ImageURLChunk | AudioChunk | AudioURLChunk, Field(discriminator="type")
]


def _convert_openai_content_chunks(openai_content_chunks: dict[str, Any]) -> ContentChunk:
Expand Down
Loading
Loading