From b9b8342dfd49d0c07d6f8902685ef0a1f41c9cc7 Mon Sep 17 00:00:00 2001 From: Cameron G <156701171+camgrimsec@users.noreply.github.com> Date: Sat, 27 Jun 2026 22:18:00 +0000 Subject: [PATCH] Add runtime warnings when max_rpm and max_tokens are unset Surfaces silent denial-of-wallet risks by logging warnings at construction time when resource controls are left unbounded: * Crew.max_rpm=None: warn that client-side rate limiting is disabled and the only safeguard is the upstream provider's rate limit. * LLM.max_tokens AND max_completion_tokens both None: warn that responses are uncapped and can produce very large outputs (e.g. 128k+ tokens) on modern models. Both warnings are emitted via @model_validator(mode='after') hooks using logging.warning, consistent with the existing logging patterns. No behavior change, no breaking changes, no new defaults, no raised errors. Also clarifies the max_rpm field description. --- lib/crewai/src/crewai/crew.py | 21 +++++++++++++++++++-- lib/crewai/src/crewai/llm.py | 15 +++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/lib/crewai/src/crewai/crew.py b/lib/crewai/src/crewai/crew.py index cd996bae40..9efef4ebb3 100644 --- a/lib/crewai/src/crewai/crew.py +++ b/lib/crewai/src/crewai/crew.py @@ -6,6 +6,7 @@ from copy import copy as shallow_copy from hashlib import md5 import json +import logging from pathlib import Path import re from typing import ( @@ -296,8 +297,8 @@ class Crew(FlowTrackable, BaseModel): max_rpm: int | None = Field( default=None, description=( - "Maximum number of requests per minute for the crew execution " - "to be respected." + "Maximum number of requests per minute for the crew execution. " + "Set to None to disable rate limiting (not recommended for production)." ), ) prompt_file: str | None = Field( @@ -613,6 +614,22 @@ def check_config_type( # TODO: Improve typing return json.loads(v) if isinstance(v, Json) else v # type: ignore + @model_validator(mode="after") + def _warn_rate_limit_disabled(self) -> Crew: + """Warn operators when no client-side rate limit is configured. + + With ``max_rpm=None`` the only safeguard against rapid-fire requests is + the upstream API provider's own rate limits, which can lead to 429s and + unexpected spend. This warning surfaces the risk at runtime without + changing behavior. + """ + if self.max_rpm is None: + logging.warning( + "max_rpm is None: rate limiting is disabled. " + "Set a positive integer in Crew() to limit API requests and avoid 429 errors." + ) + return self + @model_validator(mode="after") def set_private_attrs(self) -> Crew: """set private attributes.""" diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py index 153bbd2d73..ae988eacb2 100644 --- a/lib/crewai/src/crewai/llm.py +++ b/lib/crewai/src/crewai/llm.py @@ -683,6 +683,21 @@ def _validate_llm_fields(cls, data: Any) -> Any: data["is_anthropic"] = cls._is_anthropic_model(model) return data + @model_validator(mode="after") + def _warn_tokens_uncapped(self) -> LLM: + """Warn when neither ``max_tokens`` nor ``max_completion_tokens`` is set. + + Without an explicit cap, a single prompt can produce very large outputs + (e.g. 128k+ tokens on modern models), leading to runaway spend. This + warning surfaces the risk at runtime without changing behavior. + """ + if self.max_tokens is None and self.max_completion_tokens is None: + logging.warning( + "max_tokens/max_completion_tokens not set; LLM responses are uncapped. " + "Set a limit (e.g., max_tokens=4096) to control token costs and avoid runaway generation." + ) + return self + @model_validator(mode="after") def _init_litellm(self) -> LLM: self.is_litellm = True