From 67f160f592a8fab774792368478bcc5f37821f7f Mon Sep 17 00:00:00 2001
From: HumphreySun98 <humphreysun98@gmail.com>
Date: Mon, 15 Jun 2026 17:25:16 -0500
Subject: [PATCH] fix(llm): strip cache_breakpoint marker on the litellm
 provider path

The prompt-cache breakpoint marker is injected provider-agnostically by the
agent executors, but only the native Anthropic adapter consumes it. Native
providers route through BaseLLM._format_messages, which already strips the
marker. The default litellm-based LLM does not: _format_messages_for_provider
returned messages untouched for non-Anthropic providers, so the raw
`cache_breakpoint` key reached the wire and was rejected (e.g. Groq:
"property 'cache_breakpoint' is unsupported").

Strip the marker in _format_messages_for_provider, copying rather than
mutating so the executor's reused message buffer keeps its markers across
tool-loop iterations.

Fixes #5886

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 lib/crewai/src/crewai/llm.py               | 15 ++++++++++
 lib/crewai/tests/llms/test_prompt_cache.py | 34 ++++++++++++++++++++++
 2 files changed, 49 insertions(+)

diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py
index 153bbd2d73..3d6f108e1a 100644
--- a/lib/crewai/src/crewai/llm.py
+++ b/lib/crewai/src/crewai/llm.py
@@ -2280,11 +2280,26 @@ def _format_messages_for_provider(
         if messages is None:
             raise TypeError("Messages cannot be None")
 
+        from crewai.llms.cache import CACHE_BREAKPOINT_KEY
+        from crewai.utilities.types import LLMMessage as _LLMMessage
+
+        # Strip the provider-agnostic cache-breakpoint marker. Only the native
+        # Anthropic adapter consumes it; on the litellm path every provider
+        # (Groq, OpenAI-compatible, etc.) receives the raw message dict and
+        # rejects the unknown ``cache_breakpoint`` key, so it must never reach
+        # the wire. Copy rather than mutate, since the executor reuses this
+        # message buffer across iterations of the tool-use loop.
+        cleaned: list[LLMMessage] = []
         for msg in messages:
             if not isinstance(msg, dict) or "role" not in msg or "content" not in msg:
                 raise TypeError(
                     "Invalid message format. Each message must be a dict with 'role' and 'content' keys"
                 )
+            copy: dict[str, Any] = {
+                k: v for k, v in msg.items() if k != CACHE_BREAKPOINT_KEY
+            }
+            cleaned.append(cast(_LLMMessage, copy))
+        messages = cleaned
 
         if "o1" in self.model.lower():
             formatted_messages = []
diff --git a/lib/crewai/tests/llms/test_prompt_cache.py b/lib/crewai/tests/llms/test_prompt_cache.py
index c17dd35704..055e712281 100644
--- a/lib/crewai/tests/llms/test_prompt_cache.py
+++ b/lib/crewai/tests/llms/test_prompt_cache.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+from crewai.llm import LLM
 from crewai.llms.cache import (
     CACHE_BREAKPOINT_KEY,
     mark_cache_breakpoint,
@@ -189,3 +190,36 @@ def test_openai_format_strips_marker_from_wire_payload(self) -> None:
         formatted = llm._format_messages(messages)
         for m in formatted:
             assert CACHE_BREAKPOINT_KEY not in m
+
+
+class TestLiteLLMStripsMarker:
+    """Providers routed through the litellm ``LLM`` class (Groq, generic
+    OpenAI-compatible endpoints, etc.) do not go through ``BaseLLM._format_messages``.
+    Their messages are shaped by ``LLM._format_messages_for_provider``, which must
+    also strip the marker — otherwise the raw ``cache_breakpoint`` key reaches the
+    provider API and is rejected (e.g. Groq: "property 'cache_breakpoint' is
+    unsupported"). Regression test for #5886.
+    """
+
+    def test_groq_format_strips_marker_from_wire_payload(self) -> None:
+        llm = LLM(model="groq/llama-3.3-70b-versatile")
+        messages = [
+            mark_cache_breakpoint({"role": "system", "content": "stable"}),
+            mark_cache_breakpoint({"role": "user", "content": "hi"}),
+        ]
+        formatted = llm._format_messages_for_provider(messages)
+        for m in formatted:
+            assert CACHE_BREAKPOINT_KEY not in m
+
+    def test_litellm_format_does_not_mutate_caller_buffer(self) -> None:
+        """The executor reuses one messages buffer across tool-loop iterations,
+        so stripping must copy rather than mutate the caller's dicts.
+        """
+        llm = LLM(model="groq/llama-3.3-70b-versatile")
+        messages = [
+            mark_cache_breakpoint({"role": "system", "content": "stable"}),
+            mark_cache_breakpoint({"role": "user", "content": "hi"}),
+        ]
+        llm._format_messages_for_provider(messages)
+        assert messages[0][CACHE_BREAKPOINT_KEY] is True
+        assert messages[1][CACHE_BREAKPOINT_KEY] is True