From b9b8342dfd49d0c07d6f8902685ef0a1f41c9cc7 Mon Sep 17 00:00:00 2001
From: Cameron G <156701171+camgrimsec@users.noreply.github.com>
Date: Sat, 27 Jun 2026 22:18:00 +0000
Subject: [PATCH] Add runtime warnings when max_rpm and max_tokens are unset

Surfaces silent denial-of-wallet risks by logging warnings at construction
time when resource controls are left unbounded:

* Crew.max_rpm=None: warn that client-side rate limiting is disabled and
  the only safeguard is the upstream provider's rate limit.
* LLM.max_tokens AND max_completion_tokens both None: warn that responses
  are uncapped and can produce very large outputs (e.g. 128k+ tokens) on
  modern models.

Both warnings are emitted via @model_validator(mode='after') hooks using
logging.warning, consistent with the existing logging patterns. No
behavior change, no breaking changes, no new defaults, no raised errors.
Also clarifies the max_rpm field description.
---
 lib/crewai/src/crewai/crew.py | 21 +++++++++++++++++++--
 lib/crewai/src/crewai/llm.py  | 15 +++++++++++++++
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/lib/crewai/src/crewai/crew.py b/lib/crewai/src/crewai/crew.py
index cd996bae40..9efef4ebb3 100644
--- a/lib/crewai/src/crewai/crew.py
+++ b/lib/crewai/src/crewai/crew.py
@@ -6,6 +6,7 @@
 from copy import copy as shallow_copy
 from hashlib import md5
 import json
+import logging
 from pathlib import Path
 import re
 from typing import (
@@ -296,8 +297,8 @@ class Crew(FlowTrackable, BaseModel):
     max_rpm: int | None = Field(
         default=None,
         description=(
-            "Maximum number of requests per minute for the crew execution "
-            "to be respected."
+            "Maximum number of requests per minute for the crew execution. "
+            "Set to None to disable rate limiting (not recommended for production)."
         ),
     )
     prompt_file: str | None = Field(
@@ -613,6 +614,22 @@ def check_config_type(
         # TODO: Improve typing
         return json.loads(v) if isinstance(v, Json) else v  # type: ignore
 
+    @model_validator(mode="after")
+    def _warn_rate_limit_disabled(self) -> Crew:
+        """Warn operators when no client-side rate limit is configured.
+
+        With ``max_rpm=None`` the only safeguard against rapid-fire requests is
+        the upstream API provider's own rate limits, which can lead to 429s and
+        unexpected spend. This warning surfaces the risk at runtime without
+        changing behavior.
+        """
+        if self.max_rpm is None:
+            logging.warning(
+                "max_rpm is None: rate limiting is disabled. "
+                "Set a positive integer in Crew() to limit API requests and avoid 429 errors."
+            )
+        return self
+
     @model_validator(mode="after")
     def set_private_attrs(self) -> Crew:
         """set private attributes."""
diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py
index 153bbd2d73..ae988eacb2 100644
--- a/lib/crewai/src/crewai/llm.py
+++ b/lib/crewai/src/crewai/llm.py
@@ -683,6 +683,21 @@ def _validate_llm_fields(cls, data: Any) -> Any:
         data["is_anthropic"] = cls._is_anthropic_model(model)
         return data
 
+    @model_validator(mode="after")
+    def _warn_tokens_uncapped(self) -> LLM:
+        """Warn when neither ``max_tokens`` nor ``max_completion_tokens`` is set.
+
+        Without an explicit cap, a single prompt can produce very large outputs
+        (e.g. 128k+ tokens on modern models), leading to runaway spend. This
+        warning surfaces the risk at runtime without changing behavior.
+        """
+        if self.max_tokens is None and self.max_completion_tokens is None:
+            logging.warning(
+                "max_tokens/max_completion_tokens not set; LLM responses are uncapped. "
+                "Set a limit (e.g., max_tokens=4096) to control token costs and avoid runaway generation."
+            )
+        return self
+
     @model_validator(mode="after")
     def _init_litellm(self) -> LLM:
         self.is_litellm = True