diff --git a/lib/crewai/src/crewai/crew.py b/lib/crewai/src/crewai/crew.py index cd996bae40..9efef4ebb3 100644 --- a/lib/crewai/src/crewai/crew.py +++ b/lib/crewai/src/crewai/crew.py @@ -6,6 +6,7 @@ from copy import copy as shallow_copy from hashlib import md5 import json +import logging from pathlib import Path import re from typing import ( @@ -296,8 +297,8 @@ class Crew(FlowTrackable, BaseModel): max_rpm: int | None = Field( default=None, description=( - "Maximum number of requests per minute for the crew execution " - "to be respected." + "Maximum number of requests per minute for the crew execution. " + "Set to None to disable rate limiting (not recommended for production)." ), ) prompt_file: str | None = Field( @@ -613,6 +614,22 @@ def check_config_type( # TODO: Improve typing return json.loads(v) if isinstance(v, Json) else v # type: ignore + @model_validator(mode="after") + def _warn_rate_limit_disabled(self) -> Crew: + """Warn operators when no client-side rate limit is configured. + + With ``max_rpm=None`` the only safeguard against rapid-fire requests is + the upstream API provider's own rate limits, which can lead to 429s and + unexpected spend. This warning surfaces the risk at runtime without + changing behavior. + """ + if self.max_rpm is None: + logging.warning( + "max_rpm is None: rate limiting is disabled. " + "Set a positive integer in Crew() to limit API requests and avoid 429 errors." + ) + return self + @model_validator(mode="after") def set_private_attrs(self) -> Crew: """set private attributes.""" diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py index 153bbd2d73..ae988eacb2 100644 --- a/lib/crewai/src/crewai/llm.py +++ b/lib/crewai/src/crewai/llm.py @@ -683,6 +683,21 @@ def _validate_llm_fields(cls, data: Any) -> Any: data["is_anthropic"] = cls._is_anthropic_model(model) return data + @model_validator(mode="after") + def _warn_tokens_uncapped(self) -> LLM: + """Warn when neither ``max_tokens`` nor ``max_completion_tokens`` is set. + + Without an explicit cap, a single prompt can produce very large outputs + (e.g. 128k+ tokens on modern models), leading to runaway spend. This + warning surfaces the risk at runtime without changing behavior. + """ + if self.max_tokens is None and self.max_completion_tokens is None: + logging.warning( + "max_tokens/max_completion_tokens not set; LLM responses are uncapped. " + "Set a limit (e.g., max_tokens=4096) to control token costs and avoid runaway generation." + ) + return self + @model_validator(mode="after") def _init_litellm(self) -> LLM: self.is_litellm = True