diff --git a/lib/cli/src/crewai_cli/experimental/skills/main.py b/lib/cli/src/crewai_cli/experimental/skills/main.py index 81f9b3fc55..612b85d820 100644 --- a/lib/cli/src/crewai_cli/experimental/skills/main.py +++ b/lib/cli/src/crewai_cli/experimental/skills/main.py @@ -378,12 +378,40 @@ def _read_version(self, skill_md: Path) -> str | None: def _safe_extractall(tf: tarfile.TarFile, dest: Path) -> None: - """Path-traversal-safe extraction for Python < 3.12.""" + """Path-traversal-safe extraction for Python versions without tar filters. + + Validates both the member's own path and, for symlink/hardlink members, + the link target. Without the link-target check a malicious archive can + plant a symlink that escapes ``dest`` (e.g. ``link -> /home/user/.ssh``) + followed by a regular member written *through* that link + (``link/authorized_keys``), escaping ``dest`` even though every member + name resolves inside it. This mirrors the protection that + ``tarfile.extractall(..., filter="data")`` provides when available. + """ dest_resolved = dest.resolve() for member in tf.getmembers(): member_path = (dest / member.name).resolve() if not member_path.is_relative_to(dest_resolved): raise ValueError(f"Blocked path traversal attempt: {member.name!r}") + if not (member.isfile() or member.isdir() or member.issym() or member.islnk()): + raise ValueError(f"Blocked unsupported tar member: {member.name!r}") + if member.issym() or member.islnk(): + link_target = member.linkname + # Absolute link targets always escape the destination. + if os.path.isabs(link_target): + raise ValueError( + f"Blocked link target escaping destination: " + f"{member.name!r} -> {link_target!r}" + ) + # Hardlink names are relative to the archive root; symlink + # targets are relative to the member's own directory. + anchor = dest if member.islnk() else (dest / member.name).parent + resolved_target = (anchor / link_target).resolve() + if not resolved_target.is_relative_to(dest_resolved): + raise ValueError( + f"Blocked link target escaping destination: " + f"{member.name!r} -> {link_target!r}" + ) tf.extractall(dest) # noqa: S202 diff --git a/lib/cli/tests/skills/test_safe_extract.py b/lib/cli/tests/skills/test_safe_extract.py new file mode 100644 index 0000000000..f1083f5fa8 --- /dev/null +++ b/lib/cli/tests/skills/test_safe_extract.py @@ -0,0 +1,140 @@ +"""Regression tests for path-traversal-safe archive extraction. + +Guards against symlink/hardlink-based path traversal in the fallback used on +Python versions without tarfile extraction filters. The filtered path relies on +`tarfile.extractall(..., filter="data")`; the fallback must provide the same +protection by validating link targets, not just member names. +""" + +from __future__ import annotations + +import io +import tarfile +from pathlib import Path + +import pytest + +from crewai_cli.experimental.skills.main import _safe_extractall + + +def _tar_from_members(build) -> tarfile.TarFile: + """Build an in-memory tar archive via `build(tf)` and return it for reading.""" + buf = io.BytesIO() + with tarfile.open(fileobj=buf, mode="w") as tf: + build(tf) + buf.seek(0) + return tarfile.open(fileobj=buf, mode="r") + + +def test_blocks_symlink_escaping_destination(tmp_path: Path) -> None: + """A symlink whose target escapes dest, plus a file written through it, + must be rejected before anything is extracted.""" + outside = tmp_path / "outside" + outside.mkdir() + dest = tmp_path / "dest" + dest.mkdir() + + def build(tf: tarfile.TarFile) -> None: + link = tarfile.TarInfo("link") + link.type = tarfile.SYMTYPE + link.linkname = str(outside) # absolute path outside dest + tf.addfile(link) + payload = b"pwned" + info = tarfile.TarInfo("link/evil.txt") + info.size = len(payload) + tf.addfile(info, io.BytesIO(payload)) + + with _tar_from_members(build) as tf: + with pytest.raises(ValueError, match="escaping destination"): + _safe_extractall(tf, dest) + + assert not (outside / "evil.txt").exists() + + +def test_blocks_relative_symlink_escaping_destination(tmp_path: Path) -> None: + """A relative symlink (../..) that escapes dest is also rejected.""" + dest = tmp_path / "dest" + dest.mkdir() + + def build(tf: tarfile.TarFile) -> None: + link = tarfile.TarInfo("sub/link") + link.type = tarfile.SYMTYPE + link.linkname = "../../outside" # escapes dest from sub/ + tf.addfile(link) + + with _tar_from_members(build) as tf: + with pytest.raises(ValueError, match="escaping destination"): + _safe_extractall(tf, dest) + + +def test_blocks_hardlink_escaping_destination(tmp_path: Path) -> None: + """A hardlink whose target escapes dest is rejected.""" + dest = tmp_path / "dest" + dest.mkdir() + + def build(tf: tarfile.TarFile) -> None: + link = tarfile.TarInfo("escape") + link.type = tarfile.LNKTYPE + link.linkname = "../outside.txt" # escapes archive root + tf.addfile(link) + + with _tar_from_members(build) as tf: + with pytest.raises(ValueError, match="escaping destination"): + _safe_extractall(tf, dest) + + +def test_blocks_special_tar_member(tmp_path: Path) -> None: + """Special tar members such as FIFOs are rejected.""" + dest = tmp_path / "dest" + dest.mkdir() + + def build(tf: tarfile.TarFile) -> None: + fifo = tarfile.TarInfo("pipe") + fifo.type = tarfile.FIFOTYPE + tf.addfile(fifo) + + with _tar_from_members(build) as tf: + with pytest.raises(ValueError, match="unsupported tar member"): + _safe_extractall(tf, dest) + + +def test_allows_benign_relative_symlink(tmp_path: Path) -> None: + """A symlink that stays within dest is permitted.""" + dest = tmp_path / "dest" + dest.mkdir() + + def build(tf: tarfile.TarFile) -> None: + payload = b"hi" + info = tarfile.TarInfo("real.txt") + info.size = len(payload) + tf.addfile(info, io.BytesIO(payload)) + link = tarfile.TarInfo("alias.txt") + link.type = tarfile.SYMTYPE + link.linkname = "real.txt" # stays inside dest + tf.addfile(link) + + with _tar_from_members(build) as tf: + _safe_extractall(tf, dest) + + assert (dest / "real.txt").read_bytes() == b"hi" + assert (dest / "alias.txt").is_symlink() + assert (dest / "alias.txt").readlink() == Path("real.txt") + + +def test_allows_benign_archive(tmp_path: Path) -> None: + """An ordinary archive of regular files extracts correctly.""" + dest = tmp_path / "dest" + dest.mkdir() + + def build(tf: tarfile.TarFile) -> None: + for name, body in (("SKILL.md", b"# skill"), ("scripts/run.py", b"print(1)")): + payload = body + info = tarfile.TarInfo(name) + info.size = len(payload) + tf.addfile(info, io.BytesIO(payload)) + + with _tar_from_members(build) as tf: + _safe_extractall(tf, dest) + + assert (dest / "SKILL.md").read_bytes() == b"# skill" + assert (dest / "scripts" / "run.py").read_bytes() == b"print(1)" diff --git a/lib/crewai/src/crewai/experimental/skills/cache.py b/lib/crewai/src/crewai/experimental/skills/cache.py index e9752c4e8d..065c2c5211 100644 --- a/lib/crewai/src/crewai/experimental/skills/cache.py +++ b/lib/crewai/src/crewai/experimental/skills/cache.py @@ -9,6 +9,7 @@ from datetime import datetime, timezone import json import logging +import os from pathlib import Path import tarfile from typing import TypedDict @@ -127,12 +128,36 @@ def invalidate(self, org: str, name: str) -> bool: def _safe_extractall(tf: tarfile.TarFile, dest: Path) -> None: - """Path-traversal-safe extraction for Python < 3.12.""" + """Path-traversal-safe extraction for Python versions without tar filters. + + Validates both the member's own path and, for symlink/hardlink members, + the link target. Without the link-target check a malicious archive can + plant a symlink that escapes ``dest`` followed by a regular member written + through that link, escaping ``dest`` even though every member name resolves + inside it. This mirrors the protection that + ``tarfile.extractall(..., filter="data")`` provides when available. + """ dest_resolved = dest.resolve() for member in tf.getmembers(): member_path = (dest / member.name).resolve() if not member_path.is_relative_to(dest_resolved): raise ValueError(f"Blocked path traversal attempt: {member.name!r}") + if not (member.isfile() or member.isdir() or member.issym() or member.islnk()): + raise ValueError(f"Blocked unsupported tar member: {member.name!r}") + if member.issym() or member.islnk(): + link_target = member.linkname + if os.path.isabs(link_target): + raise ValueError( + f"Blocked link target escaping destination: " + f"{member.name!r} -> {link_target!r}" + ) + anchor = dest if member.islnk() else (dest / member.name).parent + resolved_target = (anchor / link_target).resolve() + if not resolved_target.is_relative_to(dest_resolved): + raise ValueError( + f"Blocked link target escaping destination: " + f"{member.name!r} -> {link_target!r}" + ) tf.extractall(dest) # noqa: S202 diff --git a/lib/crewai/tests/experimental/skills/test_cache.py b/lib/crewai/tests/experimental/skills/test_cache.py index 8b458bb3eb..b6601dccf0 100644 --- a/lib/crewai/tests/experimental/skills/test_cache.py +++ b/lib/crewai/tests/experimental/skills/test_cache.py @@ -8,7 +8,9 @@ import tarfile from pathlib import Path -from crewai.experimental.skills.cache import SkillCacheManager +import pytest + +from crewai.experimental.skills.cache import SkillCacheManager, _safe_extractall def _make_tar_gz(files: dict[str, str]) -> bytes: @@ -35,6 +37,15 @@ def _make_tar_gz(files: dict[str, str]) -> bytes: return out.getvalue() +def _tar_from_members(build) -> tarfile.TarFile: + """Build an in-memory tar archive via `build(tf)` and return it for reading.""" + buf = io.BytesIO() + with tarfile.open(fileobj=buf, mode="w") as tf: + build(tf) + buf.seek(0) + return tarfile.open(fileobj=buf, mode="r") + + class TestSkillCacheManager: def test_get_cached_path_missing(self, tmp_path: Path) -> None: cache = SkillCacheManager(cache_root=tmp_path) @@ -113,3 +124,85 @@ def test_store_version_none(self, tmp_path: Path) -> None: dest = cache.store("acme", "my-skill", None, archive) meta = json.loads((dest / ".crewai_meta.json").read_text()) assert meta["version"] is None + + +def test_safe_extractall_blocks_symlink_escaping_cache_destination( + tmp_path: Path, +) -> None: + """A symlink whose target escapes dest is rejected before extraction.""" + outside = tmp_path / "outside" + outside.mkdir() + dest = tmp_path / "dest" + dest.mkdir() + + def build(tf: tarfile.TarFile) -> None: + link = tarfile.TarInfo("link") + link.type = tarfile.SYMTYPE + link.linkname = str(outside) + tf.addfile(link) + payload = b"pwned" + info = tarfile.TarInfo("link/evil.txt") + info.size = len(payload) + tf.addfile(info, io.BytesIO(payload)) + + with _tar_from_members(build) as tf: + with pytest.raises(ValueError, match="escaping destination"): + _safe_extractall(tf, dest) + + assert not (outside / "evil.txt").exists() + + +def test_safe_extractall_blocks_hardlink_escaping_cache_destination( + tmp_path: Path, +) -> None: + """A hardlink whose target escapes dest is rejected.""" + dest = tmp_path / "dest" + dest.mkdir() + + def build(tf: tarfile.TarFile) -> None: + link = tarfile.TarInfo("escape") + link.type = tarfile.LNKTYPE + link.linkname = "../outside.txt" + tf.addfile(link) + + with _tar_from_members(build) as tf: + with pytest.raises(ValueError, match="escaping destination"): + _safe_extractall(tf, dest) + + +def test_safe_extractall_blocks_special_cache_tar_member(tmp_path: Path) -> None: + """Special tar members such as FIFOs are rejected.""" + dest = tmp_path / "dest" + dest.mkdir() + + def build(tf: tarfile.TarFile) -> None: + fifo = tarfile.TarInfo("pipe") + fifo.type = tarfile.FIFOTYPE + tf.addfile(fifo) + + with _tar_from_members(build) as tf: + with pytest.raises(ValueError, match="unsupported tar member"): + _safe_extractall(tf, dest) + + +def test_safe_extractall_allows_benign_cache_symlink(tmp_path: Path) -> None: + """A symlink that stays within dest is permitted.""" + dest = tmp_path / "dest" + dest.mkdir() + + def build(tf: tarfile.TarFile) -> None: + payload = b"hi" + info = tarfile.TarInfo("real.txt") + info.size = len(payload) + tf.addfile(info, io.BytesIO(payload)) + link = tarfile.TarInfo("alias.txt") + link.type = tarfile.SYMTYPE + link.linkname = "real.txt" + tf.addfile(link) + + with _tar_from_members(build) as tf: + _safe_extractall(tf, dest) + + assert (dest / "real.txt").read_bytes() == b"hi" + assert (dest / "alias.txt").is_symlink() + assert (dest / "alias.txt").readlink() == Path("real.txt")