From 67f160f592a8fab774792368478bcc5f37821f7f Mon Sep 17 00:00:00 2001 From: HumphreySun98 Date: Mon, 15 Jun 2026 17:25:16 -0500 Subject: [PATCH] fix(llm): strip cache_breakpoint marker on the litellm provider path The prompt-cache breakpoint marker is injected provider-agnostically by the agent executors, but only the native Anthropic adapter consumes it. Native providers route through BaseLLM._format_messages, which already strips the marker. The default litellm-based LLM does not: _format_messages_for_provider returned messages untouched for non-Anthropic providers, so the raw `cache_breakpoint` key reached the wire and was rejected (e.g. Groq: "property 'cache_breakpoint' is unsupported"). Strip the marker in _format_messages_for_provider, copying rather than mutating so the executor's reused message buffer keeps its markers across tool-loop iterations. Fixes #5886 Co-Authored-By: Claude Opus 4.8 (1M context) --- lib/crewai/src/crewai/llm.py | 15 ++++++++++ lib/crewai/tests/llms/test_prompt_cache.py | 34 ++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py index 153bbd2d73..3d6f108e1a 100644 --- a/lib/crewai/src/crewai/llm.py +++ b/lib/crewai/src/crewai/llm.py @@ -2280,11 +2280,26 @@ def _format_messages_for_provider( if messages is None: raise TypeError("Messages cannot be None") + from crewai.llms.cache import CACHE_BREAKPOINT_KEY + from crewai.utilities.types import LLMMessage as _LLMMessage + + # Strip the provider-agnostic cache-breakpoint marker. Only the native + # Anthropic adapter consumes it; on the litellm path every provider + # (Groq, OpenAI-compatible, etc.) receives the raw message dict and + # rejects the unknown ``cache_breakpoint`` key, so it must never reach + # the wire. Copy rather than mutate, since the executor reuses this + # message buffer across iterations of the tool-use loop. + cleaned: list[LLMMessage] = [] for msg in messages: if not isinstance(msg, dict) or "role" not in msg or "content" not in msg: raise TypeError( "Invalid message format. Each message must be a dict with 'role' and 'content' keys" ) + copy: dict[str, Any] = { + k: v for k, v in msg.items() if k != CACHE_BREAKPOINT_KEY + } + cleaned.append(cast(_LLMMessage, copy)) + messages = cleaned if "o1" in self.model.lower(): formatted_messages = [] diff --git a/lib/crewai/tests/llms/test_prompt_cache.py b/lib/crewai/tests/llms/test_prompt_cache.py index c17dd35704..055e712281 100644 --- a/lib/crewai/tests/llms/test_prompt_cache.py +++ b/lib/crewai/tests/llms/test_prompt_cache.py @@ -2,6 +2,7 @@ from __future__ import annotations +from crewai.llm import LLM from crewai.llms.cache import ( CACHE_BREAKPOINT_KEY, mark_cache_breakpoint, @@ -189,3 +190,36 @@ def test_openai_format_strips_marker_from_wire_payload(self) -> None: formatted = llm._format_messages(messages) for m in formatted: assert CACHE_BREAKPOINT_KEY not in m + + +class TestLiteLLMStripsMarker: + """Providers routed through the litellm ``LLM`` class (Groq, generic + OpenAI-compatible endpoints, etc.) do not go through ``BaseLLM._format_messages``. + Their messages are shaped by ``LLM._format_messages_for_provider``, which must + also strip the marker — otherwise the raw ``cache_breakpoint`` key reaches the + provider API and is rejected (e.g. Groq: "property 'cache_breakpoint' is + unsupported"). Regression test for #5886. + """ + + def test_groq_format_strips_marker_from_wire_payload(self) -> None: + llm = LLM(model="groq/llama-3.3-70b-versatile") + messages = [ + mark_cache_breakpoint({"role": "system", "content": "stable"}), + mark_cache_breakpoint({"role": "user", "content": "hi"}), + ] + formatted = llm._format_messages_for_provider(messages) + for m in formatted: + assert CACHE_BREAKPOINT_KEY not in m + + def test_litellm_format_does_not_mutate_caller_buffer(self) -> None: + """The executor reuses one messages buffer across tool-loop iterations, + so stripping must copy rather than mutate the caller's dicts. + """ + llm = LLM(model="groq/llama-3.3-70b-versatile") + messages = [ + mark_cache_breakpoint({"role": "system", "content": "stable"}), + mark_cache_breakpoint({"role": "user", "content": "hi"}), + ] + llm._format_messages_for_provider(messages) + assert messages[0][CACHE_BREAKPOINT_KEY] is True + assert messages[1][CACHE_BREAKPOINT_KEY] is True