From 38c6937bbb8c3242c4bac0e3085735dab6da314e Mon Sep 17 00:00:00 2001 From: Greyson LaLonde Date: Tue, 7 Apr 2026 22:41:36 +0800 Subject: [PATCH 1/9] fix: add SSRF and path traversal protections CVE-2026-2286: validate_url blocks non-http/https schemes, private IPs, loopback, link-local, reserved addresses. Applied to 11 web tools. CVE-2026-2285: validate_path confines file access to the working directory. Applied to 7 file and directory tools. --- .../src/crewai_tools/security/__init__.py | 0 .../src/crewai_tools/security/safe_path.py | 35 +++++++++++++ .../src/crewai_tools/security/safe_url.py | 50 +++++++++++++++++++ .../brightdata_tool/brightdata_unlocker.py | 3 ++ .../contextual_create_agent_tool.py | 3 ++ .../contextual_parse_tool.py | 3 ++ .../directory_read_tool.py | 3 ++ .../tools/file_read_tool/file_read_tool.py | 3 ++ .../files_compressor_tool.py | 3 ++ .../firecrawl_crawl_website_tool.py | 3 ++ .../firecrawl_scrape_website_tool.py | 3 ++ .../hyperbrowser_load_tool.py | 3 ++ .../jina_scrape_website_tool.py | 3 ++ .../crewai_tools/tools/ocr_tool/ocr_tool.py | 3 ++ .../scrape_element_from_website.py | 3 ++ .../scrape_website_tool.py | 3 ++ .../scrapfly_scrape_website_tool.py | 3 ++ .../serper_scrape_website_tool.py | 4 ++ .../serply_webpage_to_markdown_tool.py | 2 + .../tools/vision_tool/vision_tool.py | 3 ++ .../website_search/website_search_tool.py | 2 + 21 files changed, 138 insertions(+) create mode 100644 lib/crewai-tools/src/crewai_tools/security/__init__.py create mode 100644 lib/crewai-tools/src/crewai_tools/security/safe_path.py create mode 100644 lib/crewai-tools/src/crewai_tools/security/safe_url.py diff --git a/lib/crewai-tools/src/crewai_tools/security/__init__.py b/lib/crewai-tools/src/crewai_tools/security/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lib/crewai-tools/src/crewai_tools/security/safe_path.py b/lib/crewai-tools/src/crewai_tools/security/safe_path.py new file mode 100644 index 0000000000..9b608fc06b --- /dev/null +++ b/lib/crewai-tools/src/crewai_tools/security/safe_path.py @@ -0,0 +1,35 @@ +"""Path validation to prevent arbitrary file access.""" + +from __future__ import annotations + +from pathlib import Path + + +def validate_path(path: str, base_directory: str | None = None) -> Path: + """Validate that a file path is confined to a safe directory. + + Args: + path: The path to validate. + base_directory: The directory to confine access to. + Defaults to the current working directory. + + Returns: + The resolved, validated Path. + + Raises: + ValueError: If the path escapes the base directory. + """ + base = Path(base_directory).resolve() if base_directory else Path.cwd().resolve() + resolved = ( + (base / path).resolve() + if not Path(path).is_absolute() + else Path(path).resolve() + ) + + if not resolved.is_relative_to(base): + raise ValueError( + f"Path {path!r} resolves to {resolved} which is outside " + f"the allowed directory {base}." + ) + + return resolved diff --git a/lib/crewai-tools/src/crewai_tools/security/safe_url.py b/lib/crewai-tools/src/crewai_tools/security/safe_url.py new file mode 100644 index 0000000000..d5b5e9a8c3 --- /dev/null +++ b/lib/crewai-tools/src/crewai_tools/security/safe_url.py @@ -0,0 +1,50 @@ +"""URL validation to prevent SSRF attacks.""" + +from __future__ import annotations + +import ipaddress +import socket +from urllib.parse import urlparse + + +def validate_url(url: str) -> str: + """Validate that a URL is safe for outbound requests. + + Args: + url: The URL to validate. + + Returns: + The validated URL. + + Raises: + ValueError: If the URL uses a blocked scheme, resolves to a + private/loopback/link-local IP, or is otherwise invalid. + """ + parsed = urlparse(url) + + if parsed.scheme not in ("http", "https"): + raise ValueError( + f"URL scheme {parsed.scheme!r} is not allowed. " + "Only http and https are permitted." + ) + + hostname = parsed.hostname + if not hostname: + raise ValueError(f"Invalid URL: no hostname found in {url!r}") + + try: + resolved = socket.getaddrinfo( + hostname, None, socket.AF_UNSPEC, socket.SOCK_STREAM + ) + except socket.gaierror as err: + raise ValueError(f"Cannot resolve hostname: {hostname!r}") from err + + for _family, _, _, _, sockaddr in resolved: + ip = ipaddress.ip_address(sockaddr[0]) + if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved: + raise ValueError( + f"URL {url!r} resolves to a private/reserved address " + f"({ip}). Requests to internal networks are blocked." + ) + + return url diff --git a/lib/crewai-tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py b/lib/crewai-tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py index ee1716d0b5..d014c7f88e 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py +++ b/lib/crewai-tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py @@ -7,6 +7,8 @@ from pydantic import BaseModel, Field import requests +from crewai_tools.security.safe_url import validate_url + class BrightDataConfig(BaseModel): API_URL: str = "https://api.brightdata.com/request" @@ -134,6 +136,7 @@ def _run( "Content-Type": "application/json", } + url = validate_url(url) try: response = requests.post( self.base_url, json=payload, headers=headers, timeout=30 diff --git a/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py b/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py index 8896e82610..b2339e51e5 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py @@ -3,6 +3,8 @@ from crewai.tools import BaseTool from pydantic import BaseModel, Field +from crewai_tools.security.safe_path import validate_path + class ContextualAICreateAgentSchema(BaseModel): """Schema for contextual create agent tool.""" @@ -57,6 +59,7 @@ def _run( # Upload documents document_ids = [] for doc_path in document_paths: + validate_path(doc_path) if not os.path.exists(doc_path): raise FileNotFoundError(f"Document not found: {doc_path}") diff --git a/lib/crewai-tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py b/lib/crewai-tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py index 1a0317172e..06e9e3174a 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py @@ -1,6 +1,8 @@ from crewai.tools import BaseTool from pydantic import BaseModel, Field +from crewai_tools.security.safe_path import validate_path + class ContextualAIParseSchema(BaseModel): """Schema for contextual parse tool.""" @@ -52,6 +54,7 @@ def _run( import requests + validate_path(file_path) if not os.path.exists(file_path): raise FileNotFoundError(f"Document not found: {file_path}") diff --git a/lib/crewai-tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py b/lib/crewai-tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py index f65b1b82dc..0fb437693d 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py @@ -4,6 +4,8 @@ from crewai.tools import BaseTool from pydantic import BaseModel, Field +from crewai_tools.security.safe_path import validate_path + class FixedDirectoryReadToolSchema(BaseModel): """Input for DirectoryReadTool.""" @@ -39,6 +41,7 @@ def _run( if directory is None: raise ValueError("Directory must be provided.") + validate_path(directory) if directory[-1] == "/": directory = directory[:-1] files_list = [ diff --git a/lib/crewai-tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py b/lib/crewai-tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py index 2c56a70cd6..527f10d696 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py @@ -3,6 +3,8 @@ from crewai.tools import BaseTool from pydantic import BaseModel, Field +from crewai_tools.security.safe_path import validate_path + class FileReadToolSchema(BaseModel): """Input for FileReadTool.""" @@ -77,6 +79,7 @@ def _run( return "Error: No file path provided. Please provide a file path either in the constructor or as an argument." try: + validate_path(file_path) with open(file_path, "r") as file: if start_line == 1 and line_count is None: return file.read() diff --git a/lib/crewai-tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool.py b/lib/crewai-tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool.py index 15861d9872..a7e79c5178 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool.py @@ -5,6 +5,8 @@ from crewai.tools import BaseTool from pydantic import BaseModel, Field +from crewai_tools.security.safe_path import validate_path + class FileCompressorToolInput(BaseModel): """Input schema for FileCompressorTool.""" @@ -40,6 +42,7 @@ def _run( overwrite: bool = False, format: str = "zip", ) -> str: + validate_path(input_path) if not os.path.exists(input_path): return f"Input path '{input_path}' does not exist." diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index cce84c5220..3a43bf1204 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -5,6 +5,8 @@ from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, ConfigDict, Field, PrivateAttr +from crewai_tools.security.safe_url import validate_url + try: from firecrawl import FirecrawlApp # type: ignore[import-untyped] @@ -106,6 +108,7 @@ def _run(self, url: str) -> Any: if not self._firecrawl: raise RuntimeError("FirecrawlApp not properly initialized") + url = validate_url(url) return self._firecrawl.crawl(url=url, poll_interval=2, **self.config) diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py index 684cc96178..9ae0a49780 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py @@ -5,6 +5,8 @@ from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, ConfigDict, Field, PrivateAttr +from crewai_tools.security.safe_url import validate_url + try: from firecrawl import FirecrawlApp # type: ignore[import-untyped] @@ -106,6 +108,7 @@ def _run(self, url: str) -> Any: if not self._firecrawl: raise RuntimeError("FirecrawlApp not properly initialized") + url = validate_url(url) return self._firecrawl.scrape(url=url, **self.config) diff --git a/lib/crewai-tools/src/crewai_tools/tools/hyperbrowser_load_tool/hyperbrowser_load_tool.py b/lib/crewai-tools/src/crewai_tools/tools/hyperbrowser_load_tool/hyperbrowser_load_tool.py index 4cf52adab5..92446b3b1a 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/hyperbrowser_load_tool/hyperbrowser_load_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/hyperbrowser_load_tool/hyperbrowser_load_tool.py @@ -4,6 +4,8 @@ from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, Field +from crewai_tools.security.safe_url import validate_url + class HyperbrowserLoadToolSchema(BaseModel): url: str = Field(description="Website URL") @@ -119,6 +121,7 @@ def _run( ) from e params = self._prepare_params(params) + url = validate_url(url) if operation == "scrape": scrape_params = StartScrapeJobParams(url=url, **params) diff --git a/lib/crewai-tools/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py index 229df0f8ce..5319e05b3c 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py @@ -4,6 +4,8 @@ from pydantic import BaseModel, Field import requests +from crewai_tools.security.safe_url import validate_url + class JinaScrapeWebsiteToolInput(BaseModel): """Input schema for JinaScrapeWebsiteTool.""" @@ -45,6 +47,7 @@ def _run(self, website_url: str | None = None) -> str: "Website URL must be provided either during initialization or execution" ) + url = validate_url(url) response = requests.get( f"https://r.jina.ai/{url}", headers=self.headers, timeout=15 ) diff --git a/lib/crewai-tools/src/crewai_tools/tools/ocr_tool/ocr_tool.py b/lib/crewai-tools/src/crewai_tools/tools/ocr_tool/ocr_tool.py index 89ae45fb65..5bbcf63339 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/ocr_tool/ocr_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/ocr_tool/ocr_tool.py @@ -11,6 +11,8 @@ from crewai.utilities.types import LLMMessage from pydantic import BaseModel, Field +from crewai_tools.security.safe_path import validate_path + class OCRToolSchema(BaseModel): """Input schema for Optical Character Recognition Tool. @@ -98,5 +100,6 @@ def _encode_image(image_path: str) -> str: Returns: str: Base64-encoded image data as a UTF-8 string. """ + validate_path(image_path) with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode() diff --git a/lib/crewai-tools/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py b/lib/crewai-tools/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py index fc7b69a7c6..ce18d33dc2 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py +++ b/lib/crewai-tools/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py @@ -5,6 +5,8 @@ from pydantic import BaseModel, Field import requests +from crewai_tools.security.safe_url import validate_url + try: from bs4 import BeautifulSoup @@ -81,6 +83,7 @@ def _run( if website_url is None or css_element is None: raise ValueError("Both website_url and css_element must be provided.") + website_url = validate_url(website_url) page = requests.get( website_url, headers=self.headers, diff --git a/lib/crewai-tools/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py index 375fcb6b4e..f0d2eece5f 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py @@ -5,6 +5,8 @@ from pydantic import Field import requests +from crewai_tools.security.safe_url import validate_url + try: from bs4 import BeautifulSoup @@ -73,6 +75,7 @@ def _run( if website_url is None: raise ValueError("Website URL must be provided.") + website_url = validate_url(website_url) page = requests.get( website_url, timeout=15, diff --git a/lib/crewai-tools/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py index 3c96d31afd..c702de084c 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py @@ -5,6 +5,8 @@ from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, Field +from crewai_tools.security.safe_url import validate_url + logger = logging.getLogger(__file__) @@ -72,6 +74,7 @@ def _run( ) -> str | None: from scrapfly import ScrapeConfig + url = validate_url(url) scrape_config = scrape_config if scrape_config is not None else {} try: response = self.scrapfly.scrape( # type: ignore[union-attr] diff --git a/lib/crewai-tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py index e0e4080b4f..c579e5f383 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py @@ -5,6 +5,8 @@ from pydantic import BaseModel, Field import requests +from crewai_tools.security.safe_url import validate_url + class SerperScrapeWebsiteInput(BaseModel): """Input schema for SerperScrapeWebsite.""" @@ -55,6 +57,8 @@ def _run(self, url: str, include_markdown: bool = True) -> str: # Set headers headers = {"X-API-KEY": api_key or "", "Content-Type": "application/json"} + url = validate_url(url) + # Make the API request response = requests.post( api_url, diff --git a/lib/crewai-tools/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py b/lib/crewai-tools/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py index f3a4729f20..8838315926 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py @@ -5,6 +5,7 @@ from pydantic import BaseModel, Field import requests +from crewai_tools.security.safe_url import validate_url from crewai_tools.tools.rag.rag_tool import RagTool @@ -48,6 +49,7 @@ def _run( # type: ignore[override] if self.proxy_location and not self.headers.get("X-Proxy-Location"): self.headers["X-Proxy-Location"] = self.proxy_location + url = validate_url(url) data = {"url": url, "method": "GET", "response_type": "markdown"} response = requests.request( "POST", diff --git a/lib/crewai-tools/src/crewai_tools/tools/vision_tool/vision_tool.py b/lib/crewai-tools/src/crewai_tools/tools/vision_tool/vision_tool.py index 1fa75c6883..d2dca0fca2 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/vision_tool/vision_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/vision_tool/vision_tool.py @@ -7,6 +7,8 @@ from crewai.utilities.types import LLMMessage from pydantic import BaseModel, Field, PrivateAttr, field_validator +from crewai_tools.security.safe_path import validate_path + class ImagePromptSchema(BaseModel): """Input for Vision Tool.""" @@ -135,5 +137,6 @@ def _encode_image(image_path: str) -> str: Returns: Base64-encoded image data """ + validate_path(image_path) with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode() diff --git a/lib/crewai-tools/src/crewai_tools/tools/website_search/website_search_tool.py b/lib/crewai-tools/src/crewai_tools/tools/website_search/website_search_tool.py index 323557779e..a91b4a93e5 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/website_search/website_search_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/website_search/website_search_tool.py @@ -3,6 +3,7 @@ from pydantic import BaseModel, Field from crewai_tools.rag.data_types import DataType +from crewai_tools.security.safe_url import validate_url from crewai_tools.tools.rag.rag_tool import RagTool @@ -37,6 +38,7 @@ def __init__(self, website: str | None = None, **kwargs: Any) -> None: self._generate_description() def add(self, website: str) -> None: # type: ignore[override] + website = validate_url(website) super().add(website, data_type=DataType.WEBSITE) def _run( # type: ignore[override] From eb7ef5a99c96d05502bf95653a7da68b204c7ded Mon Sep 17 00:00:00 2001 From: Greyson LaLonde Date: Tue, 7 Apr 2026 22:46:43 +0800 Subject: [PATCH 2/9] fix: drop unused assignment from validate_url call --- .../crewai_tools/tools/brightdata_tool/brightdata_unlocker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/crewai-tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py b/lib/crewai-tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py index d014c7f88e..131e9b621e 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py +++ b/lib/crewai-tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py @@ -136,7 +136,7 @@ def _run( "Content-Type": "application/json", } - url = validate_url(url) + validate_url(url) try: response = requests.post( self.base_url, json=payload, headers=headers, timeout=30 From 0cc1b6e9bfa40d9576f556490c37794c00a0d17c Mon Sep 17 00:00:00 2001 From: Greyson LaLonde Date: Tue, 7 Apr 2026 23:07:57 +0800 Subject: [PATCH 3/9] fix: DNS rebinding protection and allow_private flag Rewrite validated URLs to use the resolved IP, preventing DNS rebinding between validation and request time. SDK-based tools use pin_ip=False since they manage their own HTTP clients. Add allow_private flag for deployments that need internal network access. --- .../src/crewai_tools/security/safe_url.py | 45 ++++++++++++++++--- .../brightdata_tool/brightdata_unlocker.py | 2 +- .../firecrawl_crawl_website_tool.py | 2 +- .../firecrawl_scrape_website_tool.py | 2 +- .../hyperbrowser_load_tool.py | 2 +- .../jina_scrape_website_tool.py | 2 +- .../scrapfly_scrape_website_tool.py | 2 +- .../serper_scrape_website_tool.py | 2 +- .../serply_webpage_to_markdown_tool.py | 2 +- .../website_search/website_search_tool.py | 2 +- 10 files changed, 48 insertions(+), 15 deletions(-) diff --git a/lib/crewai-tools/src/crewai_tools/security/safe_url.py b/lib/crewai-tools/src/crewai_tools/security/safe_url.py index d5b5e9a8c3..8b692b98b1 100644 --- a/lib/crewai-tools/src/crewai_tools/security/safe_url.py +++ b/lib/crewai-tools/src/crewai_tools/security/safe_url.py @@ -1,17 +1,33 @@ -"""URL validation to prevent SSRF attacks.""" +"""URL validation to prevent SSRF attacks. + +Returns a rewritten URL that connects to the resolved IP directly, +preventing DNS rebinding between validation and request time. +""" from __future__ import annotations import ipaddress import socket -from urllib.parse import urlparse +from urllib.parse import urlparse, urlunparse -def validate_url(url: str) -> str: +def validate_url( + url: str, + *, + allow_private: bool = False, + pin_ip: bool = True, +) -> str: """Validate that a URL is safe for outbound requests. + Resolves the hostname and optionally rewrites the URL to use the + resolved IP, preventing DNS rebinding attacks. + Args: url: The URL to validate. + allow_private: If True, skip the private/reserved IP check. + pin_ip: If True, rewrite the URL to connect to the resolved IP. + Set to False for tools that delegate to third-party SDKs + where IP-based URLs would break TLS. Returns: The validated URL. @@ -39,12 +55,29 @@ def validate_url(url: str) -> str: except socket.gaierror as err: raise ValueError(f"Cannot resolve hostname: {hostname!r}") from err + if not resolved: + raise ValueError(f"No addresses found for hostname: {hostname!r}") + + safe_ip: str | None = None for _family, _, _, _, sockaddr in resolved: ip = ipaddress.ip_address(sockaddr[0]) - if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved: + if not allow_private and ( + ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved + ): raise ValueError( f"URL {url!r} resolves to a private/reserved address " - f"({ip}). Requests to internal networks are blocked." + f"({ip}). Requests to internal networks are blocked. " + "Pass allow_private=True to override." ) + if safe_ip is None: + safe_ip = str(sockaddr[0]) + + if not pin_ip: + return url + + ip_obj = ipaddress.ip_address(safe_ip) # type: ignore[arg-type] + ip_host = f"[{safe_ip}]" if ip_obj.version == 6 else safe_ip + port_suffix = f":{parsed.port}" if parsed.port else "" + pinned_netloc = f"{ip_host}{port_suffix}" - return url + return urlunparse(parsed._replace(netloc=pinned_netloc)) diff --git a/lib/crewai-tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py b/lib/crewai-tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py index 131e9b621e..bfe5866b3d 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py +++ b/lib/crewai-tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py @@ -136,7 +136,7 @@ def _run( "Content-Type": "application/json", } - validate_url(url) + validate_url(url, pin_ip=False) try: response = requests.post( self.base_url, json=payload, headers=headers, timeout=30 diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index 3a43bf1204..5fab6d7400 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -108,7 +108,7 @@ def _run(self, url: str) -> Any: if not self._firecrawl: raise RuntimeError("FirecrawlApp not properly initialized") - url = validate_url(url) + url = validate_url(url, pin_ip=False) return self._firecrawl.crawl(url=url, poll_interval=2, **self.config) diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py index 9ae0a49780..8513406f9f 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py @@ -108,7 +108,7 @@ def _run(self, url: str) -> Any: if not self._firecrawl: raise RuntimeError("FirecrawlApp not properly initialized") - url = validate_url(url) + url = validate_url(url, pin_ip=False) return self._firecrawl.scrape(url=url, **self.config) diff --git a/lib/crewai-tools/src/crewai_tools/tools/hyperbrowser_load_tool/hyperbrowser_load_tool.py b/lib/crewai-tools/src/crewai_tools/tools/hyperbrowser_load_tool/hyperbrowser_load_tool.py index 92446b3b1a..e53e75e822 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/hyperbrowser_load_tool/hyperbrowser_load_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/hyperbrowser_load_tool/hyperbrowser_load_tool.py @@ -121,7 +121,7 @@ def _run( ) from e params = self._prepare_params(params) - url = validate_url(url) + url = validate_url(url, pin_ip=False) if operation == "scrape": scrape_params = StartScrapeJobParams(url=url, **params) diff --git a/lib/crewai-tools/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py index 5319e05b3c..8841938076 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py @@ -47,7 +47,7 @@ def _run(self, website_url: str | None = None) -> str: "Website URL must be provided either during initialization or execution" ) - url = validate_url(url) + url = validate_url(url, pin_ip=False) response = requests.get( f"https://r.jina.ai/{url}", headers=self.headers, timeout=15 ) diff --git a/lib/crewai-tools/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py index c702de084c..d3720d106d 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py @@ -74,7 +74,7 @@ def _run( ) -> str | None: from scrapfly import ScrapeConfig - url = validate_url(url) + url = validate_url(url, pin_ip=False) scrape_config = scrape_config if scrape_config is not None else {} try: response = self.scrapfly.scrape( # type: ignore[union-attr] diff --git a/lib/crewai-tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py index c579e5f383..b278b86855 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py @@ -57,7 +57,7 @@ def _run(self, url: str, include_markdown: bool = True) -> str: # Set headers headers = {"X-API-KEY": api_key or "", "Content-Type": "application/json"} - url = validate_url(url) + url = validate_url(url, pin_ip=False) # Make the API request response = requests.post( diff --git a/lib/crewai-tools/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py b/lib/crewai-tools/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py index 8838315926..09b980f0af 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py @@ -49,7 +49,7 @@ def _run( # type: ignore[override] if self.proxy_location and not self.headers.get("X-Proxy-Location"): self.headers["X-Proxy-Location"] = self.proxy_location - url = validate_url(url) + validate_url(url, pin_ip=False) data = {"url": url, "method": "GET", "response_type": "markdown"} response = requests.request( "POST", diff --git a/lib/crewai-tools/src/crewai_tools/tools/website_search/website_search_tool.py b/lib/crewai-tools/src/crewai_tools/tools/website_search/website_search_tool.py index a91b4a93e5..6feb02d93a 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/website_search/website_search_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/website_search/website_search_tool.py @@ -38,7 +38,7 @@ def __init__(self, website: str | None = None, **kwargs: Any) -> None: self._generate_description() def add(self, website: str) -> None: # type: ignore[override] - website = validate_url(website) + website = validate_url(website, pin_ip=False) super().add(website, data_type=DataType.WEBSITE) def _run( # type: ignore[override] From 92bf205ae315639cb92d692a7b60524397210b65 Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 7 Apr 2026 09:54:44 -0700 Subject: [PATCH 4/9] fix: unify security utilities and restore RAG chokepoint validation Co-Authored-By: Claude Sonnet 4.6 --- .../src/crewai_tools/security/__init__.py | 0 .../src/crewai_tools/security/safe_path.py | 35 -------- .../src/crewai_tools/security/safe_url.py | 83 ------------------- .../brightdata_tool/brightdata_unlocker.py | 4 +- .../contextual_create_agent_tool.py | 4 +- .../contextual_parse_tool.py | 4 +- .../directory_read_tool.py | 4 +- .../tools/file_read_tool/file_read_tool.py | 4 +- .../files_compressor_tool.py | 4 +- .../firecrawl_crawl_website_tool.py | 4 +- .../firecrawl_scrape_website_tool.py | 4 +- .../hyperbrowser_load_tool.py | 4 +- .../jina_scrape_website_tool.py | 4 +- .../crewai_tools/tools/ocr_tool/ocr_tool.py | 4 +- .../scrape_element_from_website.py | 2 +- .../scrape_website_tool.py | 2 +- .../scrapfly_scrape_website_tool.py | 4 +- .../serper_scrape_website_tool.py | 4 +- .../serply_webpage_to_markdown_tool.py | 4 +- .../tools/vision_tool/vision_tool.py | 4 +- .../website_search/website_search_tool.py | 4 +- 21 files changed, 34 insertions(+), 152 deletions(-) delete mode 100644 lib/crewai-tools/src/crewai_tools/security/__init__.py delete mode 100644 lib/crewai-tools/src/crewai_tools/security/safe_path.py delete mode 100644 lib/crewai-tools/src/crewai_tools/security/safe_url.py diff --git a/lib/crewai-tools/src/crewai_tools/security/__init__.py b/lib/crewai-tools/src/crewai_tools/security/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/lib/crewai-tools/src/crewai_tools/security/safe_path.py b/lib/crewai-tools/src/crewai_tools/security/safe_path.py deleted file mode 100644 index 9b608fc06b..0000000000 --- a/lib/crewai-tools/src/crewai_tools/security/safe_path.py +++ /dev/null @@ -1,35 +0,0 @@ -"""Path validation to prevent arbitrary file access.""" - -from __future__ import annotations - -from pathlib import Path - - -def validate_path(path: str, base_directory: str | None = None) -> Path: - """Validate that a file path is confined to a safe directory. - - Args: - path: The path to validate. - base_directory: The directory to confine access to. - Defaults to the current working directory. - - Returns: - The resolved, validated Path. - - Raises: - ValueError: If the path escapes the base directory. - """ - base = Path(base_directory).resolve() if base_directory else Path.cwd().resolve() - resolved = ( - (base / path).resolve() - if not Path(path).is_absolute() - else Path(path).resolve() - ) - - if not resolved.is_relative_to(base): - raise ValueError( - f"Path {path!r} resolves to {resolved} which is outside " - f"the allowed directory {base}." - ) - - return resolved diff --git a/lib/crewai-tools/src/crewai_tools/security/safe_url.py b/lib/crewai-tools/src/crewai_tools/security/safe_url.py deleted file mode 100644 index 8b692b98b1..0000000000 --- a/lib/crewai-tools/src/crewai_tools/security/safe_url.py +++ /dev/null @@ -1,83 +0,0 @@ -"""URL validation to prevent SSRF attacks. - -Returns a rewritten URL that connects to the resolved IP directly, -preventing DNS rebinding between validation and request time. -""" - -from __future__ import annotations - -import ipaddress -import socket -from urllib.parse import urlparse, urlunparse - - -def validate_url( - url: str, - *, - allow_private: bool = False, - pin_ip: bool = True, -) -> str: - """Validate that a URL is safe for outbound requests. - - Resolves the hostname and optionally rewrites the URL to use the - resolved IP, preventing DNS rebinding attacks. - - Args: - url: The URL to validate. - allow_private: If True, skip the private/reserved IP check. - pin_ip: If True, rewrite the URL to connect to the resolved IP. - Set to False for tools that delegate to third-party SDKs - where IP-based URLs would break TLS. - - Returns: - The validated URL. - - Raises: - ValueError: If the URL uses a blocked scheme, resolves to a - private/loopback/link-local IP, or is otherwise invalid. - """ - parsed = urlparse(url) - - if parsed.scheme not in ("http", "https"): - raise ValueError( - f"URL scheme {parsed.scheme!r} is not allowed. " - "Only http and https are permitted." - ) - - hostname = parsed.hostname - if not hostname: - raise ValueError(f"Invalid URL: no hostname found in {url!r}") - - try: - resolved = socket.getaddrinfo( - hostname, None, socket.AF_UNSPEC, socket.SOCK_STREAM - ) - except socket.gaierror as err: - raise ValueError(f"Cannot resolve hostname: {hostname!r}") from err - - if not resolved: - raise ValueError(f"No addresses found for hostname: {hostname!r}") - - safe_ip: str | None = None - for _family, _, _, _, sockaddr in resolved: - ip = ipaddress.ip_address(sockaddr[0]) - if not allow_private and ( - ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved - ): - raise ValueError( - f"URL {url!r} resolves to a private/reserved address " - f"({ip}). Requests to internal networks are blocked. " - "Pass allow_private=True to override." - ) - if safe_ip is None: - safe_ip = str(sockaddr[0]) - - if not pin_ip: - return url - - ip_obj = ipaddress.ip_address(safe_ip) # type: ignore[arg-type] - ip_host = f"[{safe_ip}]" if ip_obj.version == 6 else safe_ip - port_suffix = f":{parsed.port}" if parsed.port else "" - pinned_netloc = f"{ip_host}{port_suffix}" - - return urlunparse(parsed._replace(netloc=pinned_netloc)) diff --git a/lib/crewai-tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py b/lib/crewai-tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py index bfe5866b3d..9be3fb5676 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py +++ b/lib/crewai-tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py @@ -7,7 +7,7 @@ from pydantic import BaseModel, Field import requests -from crewai_tools.security.safe_url import validate_url +from crewai_tools.utilities.safe_path import validate_url class BrightDataConfig(BaseModel): @@ -136,7 +136,7 @@ def _run( "Content-Type": "application/json", } - validate_url(url, pin_ip=False) + validate_url(url) try: response = requests.post( self.base_url, json=payload, headers=headers, timeout=30 diff --git a/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py b/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py index b2339e51e5..dd004a2b05 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py @@ -3,7 +3,7 @@ from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.security.safe_path import validate_path +from crewai_tools.utilities.safe_path import validate_file_path class ContextualAICreateAgentSchema(BaseModel): @@ -59,7 +59,7 @@ def _run( # Upload documents document_ids = [] for doc_path in document_paths: - validate_path(doc_path) + validate_file_path(doc_path) if not os.path.exists(doc_path): raise FileNotFoundError(f"Document not found: {doc_path}") diff --git a/lib/crewai-tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py b/lib/crewai-tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py index 06e9e3174a..d9e361a304 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py @@ -1,7 +1,7 @@ from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.security.safe_path import validate_path +from crewai_tools.utilities.safe_path import validate_file_path class ContextualAIParseSchema(BaseModel): @@ -54,7 +54,7 @@ def _run( import requests - validate_path(file_path) + validate_file_path(file_path) if not os.path.exists(file_path): raise FileNotFoundError(f"Document not found: {file_path}") diff --git a/lib/crewai-tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py b/lib/crewai-tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py index 0fb437693d..7c9cd5bced 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py @@ -4,7 +4,7 @@ from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.security.safe_path import validate_path +from crewai_tools.utilities.safe_path import validate_file_path class FixedDirectoryReadToolSchema(BaseModel): @@ -41,7 +41,7 @@ def _run( if directory is None: raise ValueError("Directory must be provided.") - validate_path(directory) + validate_file_path(directory) if directory[-1] == "/": directory = directory[:-1] files_list = [ diff --git a/lib/crewai-tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py b/lib/crewai-tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py index 527f10d696..7f740f4e15 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py @@ -3,7 +3,7 @@ from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.security.safe_path import validate_path +from crewai_tools.utilities.safe_path import validate_file_path class FileReadToolSchema(BaseModel): @@ -79,7 +79,7 @@ def _run( return "Error: No file path provided. Please provide a file path either in the constructor or as an argument." try: - validate_path(file_path) + validate_file_path(file_path) with open(file_path, "r") as file: if start_line == 1 and line_count is None: return file.read() diff --git a/lib/crewai-tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool.py b/lib/crewai-tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool.py index a7e79c5178..e2f11fd289 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool.py @@ -5,7 +5,7 @@ from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.security.safe_path import validate_path +from crewai_tools.utilities.safe_path import validate_file_path class FileCompressorToolInput(BaseModel): @@ -42,7 +42,7 @@ def _run( overwrite: bool = False, format: str = "zip", ) -> str: - validate_path(input_path) + validate_file_path(input_path) if not os.path.exists(input_path): return f"Input path '{input_path}' does not exist." diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index 5fab6d7400..ede632f6e4 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -5,7 +5,7 @@ from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, ConfigDict, Field, PrivateAttr -from crewai_tools.security.safe_url import validate_url +from crewai_tools.utilities.safe_path import validate_url try: @@ -108,7 +108,7 @@ def _run(self, url: str) -> Any: if not self._firecrawl: raise RuntimeError("FirecrawlApp not properly initialized") - url = validate_url(url, pin_ip=False) + url = validate_url(url) return self._firecrawl.crawl(url=url, poll_interval=2, **self.config) diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py index 8513406f9f..84b9c686d2 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py @@ -5,7 +5,7 @@ from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, ConfigDict, Field, PrivateAttr -from crewai_tools.security.safe_url import validate_url +from crewai_tools.utilities.safe_path import validate_url try: @@ -108,7 +108,7 @@ def _run(self, url: str) -> Any: if not self._firecrawl: raise RuntimeError("FirecrawlApp not properly initialized") - url = validate_url(url, pin_ip=False) + url = validate_url(url) return self._firecrawl.scrape(url=url, **self.config) diff --git a/lib/crewai-tools/src/crewai_tools/tools/hyperbrowser_load_tool/hyperbrowser_load_tool.py b/lib/crewai-tools/src/crewai_tools/tools/hyperbrowser_load_tool/hyperbrowser_load_tool.py index e53e75e822..0f6b5cc81b 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/hyperbrowser_load_tool/hyperbrowser_load_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/hyperbrowser_load_tool/hyperbrowser_load_tool.py @@ -4,7 +4,7 @@ from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, Field -from crewai_tools.security.safe_url import validate_url +from crewai_tools.utilities.safe_path import validate_url class HyperbrowserLoadToolSchema(BaseModel): @@ -121,7 +121,7 @@ def _run( ) from e params = self._prepare_params(params) - url = validate_url(url, pin_ip=False) + url = validate_url(url) if operation == "scrape": scrape_params = StartScrapeJobParams(url=url, **params) diff --git a/lib/crewai-tools/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py index 8841938076..daa7f6b746 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py @@ -4,7 +4,7 @@ from pydantic import BaseModel, Field import requests -from crewai_tools.security.safe_url import validate_url +from crewai_tools.utilities.safe_path import validate_url class JinaScrapeWebsiteToolInput(BaseModel): @@ -47,7 +47,7 @@ def _run(self, website_url: str | None = None) -> str: "Website URL must be provided either during initialization or execution" ) - url = validate_url(url, pin_ip=False) + url = validate_url(url) response = requests.get( f"https://r.jina.ai/{url}", headers=self.headers, timeout=15 ) diff --git a/lib/crewai-tools/src/crewai_tools/tools/ocr_tool/ocr_tool.py b/lib/crewai-tools/src/crewai_tools/tools/ocr_tool/ocr_tool.py index 5bbcf63339..d74f5a5144 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/ocr_tool/ocr_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/ocr_tool/ocr_tool.py @@ -11,7 +11,7 @@ from crewai.utilities.types import LLMMessage from pydantic import BaseModel, Field -from crewai_tools.security.safe_path import validate_path +from crewai_tools.utilities.safe_path import validate_file_path class OCRToolSchema(BaseModel): @@ -100,6 +100,6 @@ def _encode_image(image_path: str) -> str: Returns: str: Base64-encoded image data as a UTF-8 string. """ - validate_path(image_path) + validate_file_path(image_path) with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode() diff --git a/lib/crewai-tools/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py b/lib/crewai-tools/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py index ce18d33dc2..9044fef403 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py +++ b/lib/crewai-tools/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py @@ -5,7 +5,7 @@ from pydantic import BaseModel, Field import requests -from crewai_tools.security.safe_url import validate_url +from crewai_tools.utilities.safe_path import validate_url try: diff --git a/lib/crewai-tools/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py index f0d2eece5f..dec62372f0 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py @@ -5,7 +5,7 @@ from pydantic import Field import requests -from crewai_tools.security.safe_url import validate_url +from crewai_tools.utilities.safe_path import validate_url try: diff --git a/lib/crewai-tools/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py index d3720d106d..6dee21a29e 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py @@ -5,7 +5,7 @@ from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, Field -from crewai_tools.security.safe_url import validate_url +from crewai_tools.utilities.safe_path import validate_url logger = logging.getLogger(__file__) @@ -74,7 +74,7 @@ def _run( ) -> str | None: from scrapfly import ScrapeConfig - url = validate_url(url, pin_ip=False) + url = validate_url(url) scrape_config = scrape_config if scrape_config is not None else {} try: response = self.scrapfly.scrape( # type: ignore[union-attr] diff --git a/lib/crewai-tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py index b278b86855..aeab9c7e01 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py @@ -5,7 +5,7 @@ from pydantic import BaseModel, Field import requests -from crewai_tools.security.safe_url import validate_url +from crewai_tools.utilities.safe_path import validate_url class SerperScrapeWebsiteInput(BaseModel): @@ -57,7 +57,7 @@ def _run(self, url: str, include_markdown: bool = True) -> str: # Set headers headers = {"X-API-KEY": api_key or "", "Content-Type": "application/json"} - url = validate_url(url, pin_ip=False) + url = validate_url(url) # Make the API request response = requests.post( diff --git a/lib/crewai-tools/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py b/lib/crewai-tools/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py index 09b980f0af..58d0f8cab9 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py @@ -5,8 +5,8 @@ from pydantic import BaseModel, Field import requests -from crewai_tools.security.safe_url import validate_url from crewai_tools.tools.rag.rag_tool import RagTool +from crewai_tools.utilities.safe_path import validate_url class SerplyWebpageToMarkdownToolSchema(BaseModel): @@ -49,7 +49,7 @@ def _run( # type: ignore[override] if self.proxy_location and not self.headers.get("X-Proxy-Location"): self.headers["X-Proxy-Location"] = self.proxy_location - validate_url(url, pin_ip=False) + validate_url(url) data = {"url": url, "method": "GET", "response_type": "markdown"} response = requests.request( "POST", diff --git a/lib/crewai-tools/src/crewai_tools/tools/vision_tool/vision_tool.py b/lib/crewai-tools/src/crewai_tools/tools/vision_tool/vision_tool.py index d2dca0fca2..d45644f88d 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/vision_tool/vision_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/vision_tool/vision_tool.py @@ -7,7 +7,7 @@ from crewai.utilities.types import LLMMessage from pydantic import BaseModel, Field, PrivateAttr, field_validator -from crewai_tools.security.safe_path import validate_path +from crewai_tools.utilities.safe_path import validate_file_path class ImagePromptSchema(BaseModel): @@ -137,6 +137,6 @@ def _encode_image(image_path: str) -> str: Returns: Base64-encoded image data """ - validate_path(image_path) + validate_file_path(image_path) with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode() diff --git a/lib/crewai-tools/src/crewai_tools/tools/website_search/website_search_tool.py b/lib/crewai-tools/src/crewai_tools/tools/website_search/website_search_tool.py index 6feb02d93a..3e31b75b5b 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/website_search/website_search_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/website_search/website_search_tool.py @@ -3,8 +3,8 @@ from pydantic import BaseModel, Field from crewai_tools.rag.data_types import DataType -from crewai_tools.security.safe_url import validate_url from crewai_tools.tools.rag.rag_tool import RagTool +from crewai_tools.utilities.safe_path import validate_url class FixedWebsiteSearchToolSchema(BaseModel): @@ -38,7 +38,7 @@ def __init__(self, website: str | None = None, **kwargs: Any) -> None: self._generate_description() def add(self, website: str) -> None: # type: ignore[override] - website = validate_url(website, pin_ip=False) + website = validate_url(website) super().add(website, data_type=DataType.WEBSITE) def _run( # type: ignore[override] From eb077745da6e0cd19cdcac0f5e2621c86766acb6 Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 7 Apr 2026 10:03:07 -0700 Subject: [PATCH 5/9] refactor: move validation to security/ package + address review comments - Move safe_path.py to crewai_tools/security/; add safe_url.py re-export - Keep utilities/safe_path.py as a backwards-compat shim - Update all 21 import sites to use crewai_tools.security.safe_path - files_compressor_tool: validate output_path (user-controlled) - serper_scrape_website_tool: call validate_url() before building payload - brightdata_unlocker: validate_url() already called without assignment (no-op fix) Co-Authored-By: Claude Sonnet 4.6 --- .../src/crewai_tools/security/__init__.py | 0 .../src/crewai_tools/security/safe_path.py | 205 +++++++++++++++++ .../src/crewai_tools/security/safe_url.py | 6 + .../brightdata_tool/brightdata_unlocker.py | 2 +- .../contextual_create_agent_tool.py | 2 +- .../contextual_parse_tool.py | 2 +- .../directory_read_tool.py | 2 +- .../directory_search_tool.py | 2 +- .../tools/file_read_tool/file_read_tool.py | 2 +- .../files_compressor_tool.py | 4 +- .../firecrawl_crawl_website_tool.py | 2 +- .../firecrawl_scrape_website_tool.py | 2 +- .../hyperbrowser_load_tool.py | 2 +- .../jina_scrape_website_tool.py | 2 +- .../crewai_tools/tools/ocr_tool/ocr_tool.py | 2 +- .../src/crewai_tools/tools/rag/rag_tool.py | 2 +- .../scrape_element_from_website.py | 2 +- .../scrape_website_tool.py | 2 +- .../scrapfly_scrape_website_tool.py | 2 +- .../serper_scrape_website_tool.py | 6 +- .../serply_webpage_to_markdown_tool.py | 2 +- .../tools/vision_tool/vision_tool.py | 2 +- .../website_search/website_search_tool.py | 2 +- .../src/crewai_tools/utilities/safe_path.py | 208 +----------------- .../tests/utilities/test_safe_path.py | 2 +- 25 files changed, 244 insertions(+), 223 deletions(-) create mode 100644 lib/crewai-tools/src/crewai_tools/security/__init__.py create mode 100644 lib/crewai-tools/src/crewai_tools/security/safe_path.py create mode 100644 lib/crewai-tools/src/crewai_tools/security/safe_url.py diff --git a/lib/crewai-tools/src/crewai_tools/security/__init__.py b/lib/crewai-tools/src/crewai_tools/security/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lib/crewai-tools/src/crewai_tools/security/safe_path.py b/lib/crewai-tools/src/crewai_tools/security/safe_path.py new file mode 100644 index 0000000000..4dde68e128 --- /dev/null +++ b/lib/crewai-tools/src/crewai_tools/security/safe_path.py @@ -0,0 +1,205 @@ +"""Path and URL validation utilities for crewai-tools. + +Provides validation for file paths and URLs to prevent unauthorized +file access and server-side request forgery (SSRF) when tools accept +user-controlled or LLM-controlled inputs at runtime. + +Set CREWAI_TOOLS_ALLOW_UNSAFE_PATHS=true to bypass validation (not +recommended for production). +""" + +from __future__ import annotations + +import ipaddress +import logging +import os +import socket +from urllib.parse import urlparse + + +logger = logging.getLogger(__name__) + +_UNSAFE_PATHS_ENV = "CREWAI_TOOLS_ALLOW_UNSAFE_PATHS" + + +def _is_escape_hatch_enabled() -> bool: + """Check if the unsafe paths escape hatch is enabled.""" + return os.environ.get(_UNSAFE_PATHS_ENV, "").lower() in ("true", "1", "yes") + + +# --------------------------------------------------------------------------- +# File path validation +# --------------------------------------------------------------------------- + + +def validate_file_path(path: str, base_dir: str | None = None) -> str: + """Validate that a file path is safe to read. + + Resolves symlinks and ``..`` components, then checks that the resolved + path falls within *base_dir* (defaults to the current working directory). + + Args: + path: The file path to validate. + base_dir: Allowed root directory. Defaults to ``os.getcwd()``. + + Returns: + The resolved, validated absolute path. + + Raises: + ValueError: If the path escapes the allowed directory. + """ + if _is_escape_hatch_enabled(): + logger.warning( + "%s is enabled — skipping file path validation for: %s", + _UNSAFE_PATHS_ENV, + path, + ) + return os.path.realpath(path) + + if base_dir is None: + base_dir = os.getcwd() + + resolved_base = os.path.realpath(base_dir) + resolved_path = os.path.realpath( + os.path.join(resolved_base, path) if not os.path.isabs(path) else path + ) + + # Ensure the resolved path is within the base directory. + # When resolved_base already ends with a separator (e.g. the filesystem + # root "/"), appending os.sep would double it ("//"), so use the base + # as-is in that case. + prefix = resolved_base if resolved_base.endswith(os.sep) else resolved_base + os.sep + if not resolved_path.startswith(prefix) and resolved_path != resolved_base: + raise ValueError( + f"Path '{path}' resolves to '{resolved_path}' which is outside " + f"the allowed directory '{resolved_base}'. " + f"Set {_UNSAFE_PATHS_ENV}=true to bypass this check." + ) + + return resolved_path + + +def validate_directory_path(path: str, base_dir: str | None = None) -> str: + """Validate that a directory path is safe to read. + + Same as :func:`validate_file_path` but also checks that the path + is an existing directory. + + Args: + path: The directory path to validate. + base_dir: Allowed root directory. Defaults to ``os.getcwd()``. + + Returns: + The resolved, validated absolute path. + + Raises: + ValueError: If the path escapes the allowed directory or is not a directory. + """ + validated = validate_file_path(path, base_dir) + if not os.path.isdir(validated): + raise ValueError(f"Path '{validated}' is not a directory.") + return validated + + +# --------------------------------------------------------------------------- +# URL validation +# --------------------------------------------------------------------------- + +# Private and reserved IP ranges that should not be accessed +_BLOCKED_IPV4_NETWORKS = [ + ipaddress.ip_network("10.0.0.0/8"), + ipaddress.ip_network("172.16.0.0/12"), + ipaddress.ip_network("192.168.0.0/16"), + ipaddress.ip_network("127.0.0.0/8"), + ipaddress.ip_network("169.254.0.0/16"), # Link-local / cloud metadata + ipaddress.ip_network("0.0.0.0/32"), +] + +_BLOCKED_IPV6_NETWORKS = [ + ipaddress.ip_network("::1/128"), + ipaddress.ip_network("::/128"), + ipaddress.ip_network("fc00::/7"), # Unique local addresses + ipaddress.ip_network("fe80::/10"), # Link-local IPv6 +] + + +def _is_private_or_reserved(ip_str: str) -> bool: + """Check if an IP address is private, reserved, or otherwise unsafe.""" + try: + addr = ipaddress.ip_address(ip_str) + # Unwrap IPv4-mapped IPv6 addresses (e.g., ::ffff:127.0.0.1) to IPv4 + # so they are only checked against IPv4 networks (avoids TypeError when + # an IPv4Address is compared against an IPv6Network). + if isinstance(addr, ipaddress.IPv6Address) and addr.ipv4_mapped: + addr = addr.ipv4_mapped + networks = ( + _BLOCKED_IPV4_NETWORKS + if isinstance(addr, ipaddress.IPv4Address) + else _BLOCKED_IPV6_NETWORKS + ) + return any(addr in network for network in networks) + except ValueError: + return True # If we can't parse, block it + + +def validate_url(url: str) -> str: + """Validate that a URL is safe to fetch. + + Blocks ``file://`` scheme entirely. For ``http``/``https``, resolves + DNS and checks that the target IP is not private or reserved (prevents + SSRF to internal services and cloud metadata endpoints). + + Args: + url: The URL to validate. + + Returns: + The validated URL string. + + Raises: + ValueError: If the URL uses a blocked scheme or resolves to a + private/reserved IP address. + """ + if _is_escape_hatch_enabled(): + logger.warning( + "%s is enabled — skipping URL validation for: %s", + _UNSAFE_PATHS_ENV, + url, + ) + return url + + parsed = urlparse(url) + + # Block file:// scheme + if parsed.scheme == "file": + raise ValueError( + f"file:// URLs are not allowed: '{url}'. " + f"Use a file path instead, or set {_UNSAFE_PATHS_ENV}=true to bypass." + ) + + # Only allow http and https + if parsed.scheme not in ("http", "https"): + raise ValueError( + f"URL scheme '{parsed.scheme}' is not allowed. Only http and https are supported." + ) + + if not parsed.hostname: + raise ValueError(f"URL has no hostname: '{url}'") + + # Resolve DNS and check IPs + try: + addrinfos = socket.getaddrinfo( + parsed.hostname, parsed.port or (443 if parsed.scheme == "https" else 80) + ) + except socket.gaierror as exc: + raise ValueError(f"Could not resolve hostname: '{parsed.hostname}'") from exc + + for _family, _, _, _, sockaddr in addrinfos: + ip_str = str(sockaddr[0]) + if _is_private_or_reserved(ip_str): + raise ValueError( + f"URL '{url}' resolves to private/reserved IP {ip_str}. " + f"Access to internal networks is not allowed. " + f"Set {_UNSAFE_PATHS_ENV}=true to bypass." + ) + + return url diff --git a/lib/crewai-tools/src/crewai_tools/security/safe_url.py b/lib/crewai-tools/src/crewai_tools/security/safe_url.py new file mode 100644 index 0000000000..f95b1c4171 --- /dev/null +++ b/lib/crewai-tools/src/crewai_tools/security/safe_url.py @@ -0,0 +1,6 @@ +"""URL validation utilities — re-exports from safe_path for a cleaner API.""" + +from crewai_tools.security.safe_path import validate_url + + +__all__ = ["validate_url"] diff --git a/lib/crewai-tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py b/lib/crewai-tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py index 9be3fb5676..c549b12201 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py +++ b/lib/crewai-tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py @@ -7,7 +7,7 @@ from pydantic import BaseModel, Field import requests -from crewai_tools.utilities.safe_path import validate_url +from crewai_tools.security.safe_path import validate_url class BrightDataConfig(BaseModel): diff --git a/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py b/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py index dd004a2b05..ab334eae12 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py @@ -3,7 +3,7 @@ from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.utilities.safe_path import validate_file_path +from crewai_tools.security.safe_path import validate_file_path class ContextualAICreateAgentSchema(BaseModel): diff --git a/lib/crewai-tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py b/lib/crewai-tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py index d9e361a304..86449df0f9 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py @@ -1,7 +1,7 @@ from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.utilities.safe_path import validate_file_path +from crewai_tools.security.safe_path import validate_file_path class ContextualAIParseSchema(BaseModel): diff --git a/lib/crewai-tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py b/lib/crewai-tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py index 7c9cd5bced..3b1f73a54d 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py @@ -4,7 +4,7 @@ from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.utilities.safe_path import validate_file_path +from crewai_tools.security.safe_path import validate_file_path class FixedDirectoryReadToolSchema(BaseModel): diff --git a/lib/crewai-tools/src/crewai_tools/tools/directory_search_tool/directory_search_tool.py b/lib/crewai-tools/src/crewai_tools/tools/directory_search_tool/directory_search_tool.py index f17c4699af..aabc196214 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/directory_search_tool/directory_search_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/directory_search_tool/directory_search_tool.py @@ -3,8 +3,8 @@ from pydantic import BaseModel, Field from crewai_tools.rag.data_types import DataType +from crewai_tools.security.safe_path import validate_directory_path from crewai_tools.tools.rag.rag_tool import RagTool -from crewai_tools.utilities.safe_path import validate_directory_path class FixedDirectorySearchToolSchema(BaseModel): diff --git a/lib/crewai-tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py b/lib/crewai-tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py index 7f740f4e15..644b5a703e 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py @@ -3,7 +3,7 @@ from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.utilities.safe_path import validate_file_path +from crewai_tools.security.safe_path import validate_file_path class FileReadToolSchema(BaseModel): diff --git a/lib/crewai-tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool.py b/lib/crewai-tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool.py index e2f11fd289..65be8f071f 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool.py @@ -5,7 +5,7 @@ from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.utilities.safe_path import validate_file_path +from crewai_tools.security.safe_path import validate_file_path class FileCompressorToolInput(BaseModel): @@ -49,6 +49,8 @@ def _run( if not output_path: output_path = self._generate_output_path(input_path, format) + validate_file_path(output_path) + format_extension = { "zip": ".zip", "tar": ".tar", diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index ede632f6e4..47e98135ca 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -5,7 +5,7 @@ from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, ConfigDict, Field, PrivateAttr -from crewai_tools.utilities.safe_path import validate_url +from crewai_tools.security.safe_path import validate_url try: diff --git a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py index 84b9c686d2..35b0029612 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py @@ -5,7 +5,7 @@ from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, ConfigDict, Field, PrivateAttr -from crewai_tools.utilities.safe_path import validate_url +from crewai_tools.security.safe_path import validate_url try: diff --git a/lib/crewai-tools/src/crewai_tools/tools/hyperbrowser_load_tool/hyperbrowser_load_tool.py b/lib/crewai-tools/src/crewai_tools/tools/hyperbrowser_load_tool/hyperbrowser_load_tool.py index 0f6b5cc81b..50a752d196 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/hyperbrowser_load_tool/hyperbrowser_load_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/hyperbrowser_load_tool/hyperbrowser_load_tool.py @@ -4,7 +4,7 @@ from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, Field -from crewai_tools.utilities.safe_path import validate_url +from crewai_tools.security.safe_path import validate_url class HyperbrowserLoadToolSchema(BaseModel): diff --git a/lib/crewai-tools/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py index daa7f6b746..6762b60e8c 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py @@ -4,7 +4,7 @@ from pydantic import BaseModel, Field import requests -from crewai_tools.utilities.safe_path import validate_url +from crewai_tools.security.safe_path import validate_url class JinaScrapeWebsiteToolInput(BaseModel): diff --git a/lib/crewai-tools/src/crewai_tools/tools/ocr_tool/ocr_tool.py b/lib/crewai-tools/src/crewai_tools/tools/ocr_tool/ocr_tool.py index d74f5a5144..c8dd1b7ca1 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/ocr_tool/ocr_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/ocr_tool/ocr_tool.py @@ -11,7 +11,7 @@ from crewai.utilities.types import LLMMessage from pydantic import BaseModel, Field -from crewai_tools.utilities.safe_path import validate_file_path +from crewai_tools.security.safe_path import validate_file_path class OCRToolSchema(BaseModel): diff --git a/lib/crewai-tools/src/crewai_tools/tools/rag/rag_tool.py b/lib/crewai-tools/src/crewai_tools/tools/rag/rag_tool.py index eb7e9cefd0..24d7f1f18a 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/rag/rag_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/rag/rag_tool.py @@ -251,7 +251,7 @@ def add( # unauthorized file reads and SSRF. from urllib.parse import urlparse - from crewai_tools.utilities.safe_path import validate_file_path, validate_url + from crewai_tools.security.safe_path import validate_file_path, validate_url def _check_url(value: str, label: str) -> None: try: diff --git a/lib/crewai-tools/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py b/lib/crewai-tools/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py index 9044fef403..7bba12b72f 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py +++ b/lib/crewai-tools/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py @@ -5,7 +5,7 @@ from pydantic import BaseModel, Field import requests -from crewai_tools.utilities.safe_path import validate_url +from crewai_tools.security.safe_path import validate_url try: diff --git a/lib/crewai-tools/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py index dec62372f0..d297dfe086 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py @@ -5,7 +5,7 @@ from pydantic import Field import requests -from crewai_tools.utilities.safe_path import validate_url +from crewai_tools.security.safe_path import validate_url try: diff --git a/lib/crewai-tools/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py index 6dee21a29e..932b8dc7a3 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py @@ -5,7 +5,7 @@ from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, Field -from crewai_tools.utilities.safe_path import validate_url +from crewai_tools.security.safe_path import validate_url logger = logging.getLogger(__file__) diff --git a/lib/crewai-tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py index aeab9c7e01..9ed0ee577c 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py @@ -5,7 +5,7 @@ from pydantic import BaseModel, Field import requests -from crewai_tools.utilities.safe_path import validate_url +from crewai_tools.security.safe_path import validate_url class SerperScrapeWebsiteInput(BaseModel): @@ -51,14 +51,14 @@ def _run(self, url: str, include_markdown: bool = True) -> str: # Get API key from environment variable for security api_key = os.getenv("SERPER_API_KEY") + validate_url(url) + # Prepare the payload payload = json.dumps({"url": url, "includeMarkdown": include_markdown}) # Set headers headers = {"X-API-KEY": api_key or "", "Content-Type": "application/json"} - url = validate_url(url) - # Make the API request response = requests.post( api_url, diff --git a/lib/crewai-tools/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py b/lib/crewai-tools/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py index 58d0f8cab9..4ace8b46a3 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py @@ -5,8 +5,8 @@ from pydantic import BaseModel, Field import requests +from crewai_tools.security.safe_path import validate_url from crewai_tools.tools.rag.rag_tool import RagTool -from crewai_tools.utilities.safe_path import validate_url class SerplyWebpageToMarkdownToolSchema(BaseModel): diff --git a/lib/crewai-tools/src/crewai_tools/tools/vision_tool/vision_tool.py b/lib/crewai-tools/src/crewai_tools/tools/vision_tool/vision_tool.py index d45644f88d..5412a08f34 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/vision_tool/vision_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/vision_tool/vision_tool.py @@ -7,7 +7,7 @@ from crewai.utilities.types import LLMMessage from pydantic import BaseModel, Field, PrivateAttr, field_validator -from crewai_tools.utilities.safe_path import validate_file_path +from crewai_tools.security.safe_path import validate_file_path class ImagePromptSchema(BaseModel): diff --git a/lib/crewai-tools/src/crewai_tools/tools/website_search/website_search_tool.py b/lib/crewai-tools/src/crewai_tools/tools/website_search/website_search_tool.py index 3e31b75b5b..62a6c1d702 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/website_search/website_search_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/website_search/website_search_tool.py @@ -3,8 +3,8 @@ from pydantic import BaseModel, Field from crewai_tools.rag.data_types import DataType +from crewai_tools.security.safe_path import validate_url from crewai_tools.tools.rag.rag_tool import RagTool -from crewai_tools.utilities.safe_path import validate_url class FixedWebsiteSearchToolSchema(BaseModel): diff --git a/lib/crewai-tools/src/crewai_tools/utilities/safe_path.py b/lib/crewai-tools/src/crewai_tools/utilities/safe_path.py index 4dde68e128..6c19c829f5 100644 --- a/lib/crewai-tools/src/crewai_tools/utilities/safe_path.py +++ b/lib/crewai-tools/src/crewai_tools/utilities/safe_path.py @@ -1,205 +1,13 @@ -"""Path and URL validation utilities for crewai-tools. +"""Compatibility shim — re-exports from crewai_tools.security.safe_path. -Provides validation for file paths and URLs to prevent unauthorized -file access and server-side request forgery (SSRF) when tools accept -user-controlled or LLM-controlled inputs at runtime. - -Set CREWAI_TOOLS_ALLOW_UNSAFE_PATHS=true to bypass validation (not -recommended for production). +Import from crewai_tools.security.safe_path instead. """ -from __future__ import annotations - -import ipaddress -import logging -import os -import socket -from urllib.parse import urlparse - - -logger = logging.getLogger(__name__) - -_UNSAFE_PATHS_ENV = "CREWAI_TOOLS_ALLOW_UNSAFE_PATHS" - - -def _is_escape_hatch_enabled() -> bool: - """Check if the unsafe paths escape hatch is enabled.""" - return os.environ.get(_UNSAFE_PATHS_ENV, "").lower() in ("true", "1", "yes") - - -# --------------------------------------------------------------------------- -# File path validation -# --------------------------------------------------------------------------- - - -def validate_file_path(path: str, base_dir: str | None = None) -> str: - """Validate that a file path is safe to read. - - Resolves symlinks and ``..`` components, then checks that the resolved - path falls within *base_dir* (defaults to the current working directory). - - Args: - path: The file path to validate. - base_dir: Allowed root directory. Defaults to ``os.getcwd()``. - - Returns: - The resolved, validated absolute path. - - Raises: - ValueError: If the path escapes the allowed directory. - """ - if _is_escape_hatch_enabled(): - logger.warning( - "%s is enabled — skipping file path validation for: %s", - _UNSAFE_PATHS_ENV, - path, - ) - return os.path.realpath(path) - - if base_dir is None: - base_dir = os.getcwd() - - resolved_base = os.path.realpath(base_dir) - resolved_path = os.path.realpath( - os.path.join(resolved_base, path) if not os.path.isabs(path) else path - ) - - # Ensure the resolved path is within the base directory. - # When resolved_base already ends with a separator (e.g. the filesystem - # root "/"), appending os.sep would double it ("//"), so use the base - # as-is in that case. - prefix = resolved_base if resolved_base.endswith(os.sep) else resolved_base + os.sep - if not resolved_path.startswith(prefix) and resolved_path != resolved_base: - raise ValueError( - f"Path '{path}' resolves to '{resolved_path}' which is outside " - f"the allowed directory '{resolved_base}'. " - f"Set {_UNSAFE_PATHS_ENV}=true to bypass this check." - ) - - return resolved_path - - -def validate_directory_path(path: str, base_dir: str | None = None) -> str: - """Validate that a directory path is safe to read. - - Same as :func:`validate_file_path` but also checks that the path - is an existing directory. - - Args: - path: The directory path to validate. - base_dir: Allowed root directory. Defaults to ``os.getcwd()``. - - Returns: - The resolved, validated absolute path. - - Raises: - ValueError: If the path escapes the allowed directory or is not a directory. - """ - validated = validate_file_path(path, base_dir) - if not os.path.isdir(validated): - raise ValueError(f"Path '{validated}' is not a directory.") - return validated - - -# --------------------------------------------------------------------------- -# URL validation -# --------------------------------------------------------------------------- - -# Private and reserved IP ranges that should not be accessed -_BLOCKED_IPV4_NETWORKS = [ - ipaddress.ip_network("10.0.0.0/8"), - ipaddress.ip_network("172.16.0.0/12"), - ipaddress.ip_network("192.168.0.0/16"), - ipaddress.ip_network("127.0.0.0/8"), - ipaddress.ip_network("169.254.0.0/16"), # Link-local / cloud metadata - ipaddress.ip_network("0.0.0.0/32"), -] - -_BLOCKED_IPV6_NETWORKS = [ - ipaddress.ip_network("::1/128"), - ipaddress.ip_network("::/128"), - ipaddress.ip_network("fc00::/7"), # Unique local addresses - ipaddress.ip_network("fe80::/10"), # Link-local IPv6 -] - - -def _is_private_or_reserved(ip_str: str) -> bool: - """Check if an IP address is private, reserved, or otherwise unsafe.""" - try: - addr = ipaddress.ip_address(ip_str) - # Unwrap IPv4-mapped IPv6 addresses (e.g., ::ffff:127.0.0.1) to IPv4 - # so they are only checked against IPv4 networks (avoids TypeError when - # an IPv4Address is compared against an IPv6Network). - if isinstance(addr, ipaddress.IPv6Address) and addr.ipv4_mapped: - addr = addr.ipv4_mapped - networks = ( - _BLOCKED_IPV4_NETWORKS - if isinstance(addr, ipaddress.IPv4Address) - else _BLOCKED_IPV6_NETWORKS - ) - return any(addr in network for network in networks) - except ValueError: - return True # If we can't parse, block it - - -def validate_url(url: str) -> str: - """Validate that a URL is safe to fetch. - - Blocks ``file://`` scheme entirely. For ``http``/``https``, resolves - DNS and checks that the target IP is not private or reserved (prevents - SSRF to internal services and cloud metadata endpoints). - - Args: - url: The URL to validate. - - Returns: - The validated URL string. - - Raises: - ValueError: If the URL uses a blocked scheme or resolves to a - private/reserved IP address. - """ - if _is_escape_hatch_enabled(): - logger.warning( - "%s is enabled — skipping URL validation for: %s", - _UNSAFE_PATHS_ENV, - url, - ) - return url - - parsed = urlparse(url) - - # Block file:// scheme - if parsed.scheme == "file": - raise ValueError( - f"file:// URLs are not allowed: '{url}'. " - f"Use a file path instead, or set {_UNSAFE_PATHS_ENV}=true to bypass." - ) - - # Only allow http and https - if parsed.scheme not in ("http", "https"): - raise ValueError( - f"URL scheme '{parsed.scheme}' is not allowed. Only http and https are supported." - ) - - if not parsed.hostname: - raise ValueError(f"URL has no hostname: '{url}'") - - # Resolve DNS and check IPs - try: - addrinfos = socket.getaddrinfo( - parsed.hostname, parsed.port or (443 if parsed.scheme == "https" else 80) - ) - except socket.gaierror as exc: - raise ValueError(f"Could not resolve hostname: '{parsed.hostname}'") from exc +from crewai_tools.security.safe_path import ( + validate_directory_path, + validate_file_path, + validate_url, +) - for _family, _, _, _, sockaddr in addrinfos: - ip_str = str(sockaddr[0]) - if _is_private_or_reserved(ip_str): - raise ValueError( - f"URL '{url}' resolves to private/reserved IP {ip_str}. " - f"Access to internal networks is not allowed. " - f"Set {_UNSAFE_PATHS_ENV}=true to bypass." - ) - return url +__all__ = ["validate_directory_path", "validate_file_path", "validate_url"] diff --git a/lib/crewai-tools/tests/utilities/test_safe_path.py b/lib/crewai-tools/tests/utilities/test_safe_path.py index 83e247292e..4fb5d1ec7a 100644 --- a/lib/crewai-tools/tests/utilities/test_safe_path.py +++ b/lib/crewai-tools/tests/utilities/test_safe_path.py @@ -6,7 +6,7 @@ import pytest -from crewai_tools.utilities.safe_path import ( +from crewai_tools.security.safe_path import ( validate_directory_path, validate_file_path, validate_url, From d8a46bc411b4e014d066fe0c84fe06e269014c02 Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 7 Apr 2026 10:04:48 -0700 Subject: [PATCH 6/9] refactor: move validation to security/ package, keep utilities/ as compat shim - security/safe_path.py is the canonical location for all validation - utilities/safe_path.py re-exports for backward compatibility - All tool imports already point to security.safe_path - All review comments already addressed in prior commits --- lib/crewai-tools/src/crewai_tools/utilities/safe_path.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lib/crewai-tools/src/crewai_tools/utilities/safe_path.py b/lib/crewai-tools/src/crewai_tools/utilities/safe_path.py index 6c19c829f5..f3ec120fde 100644 --- a/lib/crewai-tools/src/crewai_tools/utilities/safe_path.py +++ b/lib/crewai-tools/src/crewai_tools/utilities/safe_path.py @@ -1,7 +1,4 @@ -"""Compatibility shim — re-exports from crewai_tools.security.safe_path. - -Import from crewai_tools.security.safe_path instead. -""" +"""Backward-compatible re-export from crewai_tools.security.safe_path.""" from crewai_tools.security.safe_path import ( validate_directory_path, From 2b39baa4dcacdf1b9fec3fe9eec9b19e77fa355c Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 7 Apr 2026 10:18:04 -0700 Subject: [PATCH 7/9] fix: move validation outside try/except blocks, use correct directory validator Co-Authored-By: Claude Sonnet 4.6 --- .../contextual_create_agent_tool.py | 3 ++- .../tools/contextualai_parse_tool/contextual_parse_tool.py | 2 +- .../tools/directory_read_tool/directory_read_tool.py | 4 ++-- .../src/crewai_tools/tools/file_read_tool/file_read_tool.py | 2 +- .../serper_scrape_website_tool/serper_scrape_website_tool.py | 3 +-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py b/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py index ab334eae12..324af88719 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py @@ -49,6 +49,8 @@ def _run( document_paths: list[str], ) -> str: """Create a complete RAG pipeline with documents.""" + for doc_path in document_paths: + validate_file_path(doc_path) try: import os @@ -59,7 +61,6 @@ def _run( # Upload documents document_ids = [] for doc_path in document_paths: - validate_file_path(doc_path) if not os.path.exists(doc_path): raise FileNotFoundError(f"Document not found: {doc_path}") diff --git a/lib/crewai-tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py b/lib/crewai-tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py index 86449df0f9..c81af3d9fa 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py @@ -47,6 +47,7 @@ def _run( """Parse a document using Contextual AI's parser.""" if output_types is None: output_types = ["markdown-per-page"] + validate_file_path(file_path) try: import json import os @@ -54,7 +55,6 @@ def _run( import requests - validate_file_path(file_path) if not os.path.exists(file_path): raise FileNotFoundError(f"Document not found: {file_path}") diff --git a/lib/crewai-tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py b/lib/crewai-tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py index 3b1f73a54d..cd65047519 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py @@ -4,7 +4,7 @@ from crewai.tools import BaseTool from pydantic import BaseModel, Field -from crewai_tools.security.safe_path import validate_file_path +from crewai_tools.security.safe_path import validate_directory_path class FixedDirectoryReadToolSchema(BaseModel): @@ -41,7 +41,7 @@ def _run( if directory is None: raise ValueError("Directory must be provided.") - validate_file_path(directory) + validate_directory_path(directory) if directory[-1] == "/": directory = directory[:-1] files_list = [ diff --git a/lib/crewai-tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py b/lib/crewai-tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py index 644b5a703e..4424717cd2 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py @@ -78,8 +78,8 @@ def _run( if file_path is None: return "Error: No file path provided. Please provide a file path either in the constructor or as an argument." + validate_file_path(file_path) try: - validate_file_path(file_path) with open(file_path, "r") as file: if start_line == 1 and line_count is None: return file.read() diff --git a/lib/crewai-tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py index 9ed0ee577c..55521104ba 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py @@ -44,6 +44,7 @@ def _run(self, url: str, include_markdown: bool = True) -> str: Returns: Scraped website content as a string """ + validate_url(url) try: # Serper API endpoint api_url = "https://scrape.serper.dev" @@ -51,8 +52,6 @@ def _run(self, url: str, include_markdown: bool = True) -> str: # Get API key from environment variable for security api_key = os.getenv("SERPER_API_KEY") - validate_url(url) - # Prepare the payload payload = json.dumps({"url": url, "includeMarkdown": include_markdown}) From f3a6924ffa2ba4a660cf2fe406cac459dcdecf82 Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 7 Apr 2026 10:30:58 -0700 Subject: [PATCH 8/9] fix: use resolved paths from validation to prevent symlink TOCTOU, remove unused safe_url.py --- lib/crewai-tools/src/crewai_tools/security/safe_url.py | 6 ------ .../contextual_create_agent_tool.py | 2 +- .../tools/contextualai_parse_tool/contextual_parse_tool.py | 2 +- .../tools/directory_read_tool/directory_read_tool.py | 2 +- .../tools/directory_search_tool/directory_search_tool.py | 2 +- .../src/crewai_tools/tools/file_read_tool/file_read_tool.py | 2 +- .../tools/files_compressor_tool/files_compressor_tool.py | 4 ++-- .../src/crewai_tools/tools/ocr_tool/ocr_tool.py | 2 +- .../src/crewai_tools/tools/vision_tool/vision_tool.py | 2 +- 9 files changed, 9 insertions(+), 15 deletions(-) delete mode 100644 lib/crewai-tools/src/crewai_tools/security/safe_url.py diff --git a/lib/crewai-tools/src/crewai_tools/security/safe_url.py b/lib/crewai-tools/src/crewai_tools/security/safe_url.py deleted file mode 100644 index f95b1c4171..0000000000 --- a/lib/crewai-tools/src/crewai_tools/security/safe_url.py +++ /dev/null @@ -1,6 +0,0 @@ -"""URL validation utilities — re-exports from safe_path for a cleaner API.""" - -from crewai_tools.security.safe_path import validate_url - - -__all__ = ["validate_url"] diff --git a/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py b/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py index 324af88719..c3e16dfabe 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py @@ -50,7 +50,7 @@ def _run( ) -> str: """Create a complete RAG pipeline with documents.""" for doc_path in document_paths: - validate_file_path(doc_path) + doc_path = validate_file_path(doc_path) try: import os diff --git a/lib/crewai-tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py b/lib/crewai-tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py index c81af3d9fa..99ef715144 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py @@ -47,7 +47,7 @@ def _run( """Parse a document using Contextual AI's parser.""" if output_types is None: output_types = ["markdown-per-page"] - validate_file_path(file_path) + file_path = validate_file_path(file_path) try: import json import os diff --git a/lib/crewai-tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py b/lib/crewai-tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py index cd65047519..cd5b31bcca 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py @@ -41,7 +41,7 @@ def _run( if directory is None: raise ValueError("Directory must be provided.") - validate_directory_path(directory) + directory = validate_directory_path(directory) if directory[-1] == "/": directory = directory[:-1] files_list = [ diff --git a/lib/crewai-tools/src/crewai_tools/tools/directory_search_tool/directory_search_tool.py b/lib/crewai-tools/src/crewai_tools/tools/directory_search_tool/directory_search_tool.py index aabc196214..3f6f278aee 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/directory_search_tool/directory_search_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/directory_search_tool/directory_search_tool.py @@ -38,7 +38,7 @@ def __init__(self, directory: str | None = None, **kwargs: Any) -> None: self._generate_description() def add(self, directory: str) -> None: # type: ignore[override] - validate_directory_path(directory) + directory = validate_directory_path(directory) super().add(directory, data_type=DataType.DIRECTORY) def _run( # type: ignore[override] diff --git a/lib/crewai-tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py b/lib/crewai-tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py index 4424717cd2..428d19d7d5 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py @@ -78,7 +78,7 @@ def _run( if file_path is None: return "Error: No file path provided. Please provide a file path either in the constructor or as an argument." - validate_file_path(file_path) + file_path = validate_file_path(file_path) try: with open(file_path, "r") as file: if start_line == 1 and line_count is None: diff --git a/lib/crewai-tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool.py b/lib/crewai-tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool.py index 65be8f071f..8a759263a6 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool.py @@ -42,14 +42,14 @@ def _run( overwrite: bool = False, format: str = "zip", ) -> str: - validate_file_path(input_path) + input_path = validate_file_path(input_path) if not os.path.exists(input_path): return f"Input path '{input_path}' does not exist." if not output_path: output_path = self._generate_output_path(input_path, format) - validate_file_path(output_path) + output_path = validate_file_path(output_path) format_extension = { "zip": ".zip", diff --git a/lib/crewai-tools/src/crewai_tools/tools/ocr_tool/ocr_tool.py b/lib/crewai-tools/src/crewai_tools/tools/ocr_tool/ocr_tool.py index c8dd1b7ca1..9a21062333 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/ocr_tool/ocr_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/ocr_tool/ocr_tool.py @@ -100,6 +100,6 @@ def _encode_image(image_path: str) -> str: Returns: str: Base64-encoded image data as a UTF-8 string. """ - validate_file_path(image_path) + image_path = validate_file_path(image_path) with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode() diff --git a/lib/crewai-tools/src/crewai_tools/tools/vision_tool/vision_tool.py b/lib/crewai-tools/src/crewai_tools/tools/vision_tool/vision_tool.py index 5412a08f34..24904c0f6b 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/vision_tool/vision_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/vision_tool/vision_tool.py @@ -137,6 +137,6 @@ def _encode_image(image_path: str) -> str: Returns: Base64-encoded image data """ - validate_file_path(image_path) + image_path = validate_file_path(image_path) with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode() From 4f5a9f71110720dc98a651c9ccaef5f191d72905 Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 7 Apr 2026 10:32:56 -0700 Subject: [PATCH 9/9] fix: use resolved paths from validation to prevent symlink TOCTOU, remove unused safe_url.py Co-Authored-By: Claude Sonnet 4.6 --- .../contextual_create_agent_tool.py | 5 ++-- .../src/crewai_tools/tools/rag/rag_tool.py | 23 ++++++++++++++----- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py b/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py index c3e16dfabe..59bc0d4432 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py @@ -49,8 +49,7 @@ def _run( document_paths: list[str], ) -> str: """Create a complete RAG pipeline with documents.""" - for doc_path in document_paths: - doc_path = validate_file_path(doc_path) + resolved_paths = [validate_file_path(doc_path) for doc_path in document_paths] try: import os @@ -60,7 +59,7 @@ def _run( # Upload documents document_ids = [] - for doc_path in document_paths: + for doc_path in resolved_paths: if not os.path.exists(doc_path): raise FileNotFoundError(f"Document not found: {doc_path}") diff --git a/lib/crewai-tools/src/crewai_tools/tools/rag/rag_tool.py b/lib/crewai-tools/src/crewai_tools/tools/rag/rag_tool.py index 24d7f1f18a..8099443e2d 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/rag/rag_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/rag/rag_tool.py @@ -259,9 +259,9 @@ def _check_url(value: str, label: str) -> None: except ValueError as e: raise ValueError(f"Blocked unsafe {label}: {e}") from e - def _check_path(value: str, label: str) -> None: + def _check_path(value: str, label: str) -> str: try: - validate_file_path(value) + return validate_file_path(value) except ValueError as e: raise ValueError(f"Blocked unsafe {label}: {e}") from e @@ -298,21 +298,32 @@ def _check_path(value: str, label: str) -> None: or os.path.isabs(source_ref) ): try: - validate_file_path(source_ref) + resolved_ref = validate_file_path(source_ref) except ValueError as e: raise ValueError(f"Blocked unsafe file path: {e}") from e + # Use the resolved path to prevent symlink TOCTOU + if isinstance(arg, dict): + arg = {**arg} + if "source" in arg: + arg["source"] = resolved_ref + elif "content" in arg: + arg["content"] = resolved_ref + else: + arg = resolved_ref validated_args.append(arg) # Validate keyword path/URL arguments — these are equally user-controlled # and must not bypass the checks applied to positional args. if "path" in kwargs and kwargs.get("path") is not None: - _check_path(str(kwargs["path"]), "path") + kwargs["path"] = _check_path(str(kwargs["path"]), "path") if "file_path" in kwargs and kwargs.get("file_path") is not None: - _check_path(str(kwargs["file_path"]), "file_path") + kwargs["file_path"] = _check_path(str(kwargs["file_path"]), "file_path") if "directory_path" in kwargs and kwargs.get("directory_path") is not None: - _check_path(str(kwargs["directory_path"]), "directory_path") + kwargs["directory_path"] = _check_path( + str(kwargs["directory_path"]), "directory_path" + ) if "url" in kwargs and kwargs.get("url") is not None: _check_url(str(kwargs["url"]), "url")