From 32d2da1bfb42fdb67e777f1d2977bfc5bf67fb87 Mon Sep 17 00:00:00 2001 From: Rip&Tear <84775494+theCyberTech@users.noreply.github.com> Date: Fri, 19 Jun 2026 12:57:17 +0800 Subject: [PATCH 1/3] Fix symlink path traversal in skill archive extraction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `_safe_extractall` (the Python < 3.12 fallback used by `crewai skills` archive unpacking) validated each member's *name* against the destination but never validated symlink/hardlink *targets*. A malicious skill tarball could plant a symlink escaping the destination (e.g. `link -> /home/user/.ssh`) followed by a regular member written through it (`link/authorized_keys`), escaping `dest` even though every member name resolves inside it — the classic symlink-extraction traversal. The 3.12+ path (`extractall(..., filter="data")`) already blocks this; the fallback now mirrors it by rejecting absolute link targets and any link target that resolves outside the destination directory. Adds regression tests covering absolute and relative escaping symlinks plus benign in-tree symlinks and ordinary archives. Co-Authored-By: Claude Opus 4.8 --- .../crewai_cli/experimental/skills/main.py | 28 ++++- lib/cli/tests/skills/test_safe_extract.py | 107 ++++++++++++++++++ 2 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 lib/cli/tests/skills/test_safe_extract.py diff --git a/lib/cli/src/crewai_cli/experimental/skills/main.py b/lib/cli/src/crewai_cli/experimental/skills/main.py index 81f9b3fc55..ecf94a1215 100644 --- a/lib/cli/src/crewai_cli/experimental/skills/main.py +++ b/lib/cli/src/crewai_cli/experimental/skills/main.py @@ -378,12 +378,38 @@ def _read_version(self, skill_md: Path) -> str | None: def _safe_extractall(tf: tarfile.TarFile, dest: Path) -> None: - """Path-traversal-safe extraction for Python < 3.12.""" + """Path-traversal-safe extraction for Python < 3.12. + + Validates both the member's own path and, for symlink/hardlink members, + the link target. Without the link-target check a malicious archive can + plant a symlink that escapes ``dest`` (e.g. ``link -> /home/user/.ssh``) + followed by a regular member written *through* that link + (``link/authorized_keys``), escaping ``dest`` even though every member + name resolves inside it. This mirrors the protection that + ``tarfile.extractall(..., filter="data")`` provides on Python >= 3.12. + """ dest_resolved = dest.resolve() for member in tf.getmembers(): member_path = (dest / member.name).resolve() if not member_path.is_relative_to(dest_resolved): raise ValueError(f"Blocked path traversal attempt: {member.name!r}") + if member.issym() or member.islnk(): + link_target = member.linkname + # Absolute link targets always escape the destination. + if os.path.isabs(link_target): + raise ValueError( + f"Blocked link target escaping destination: " + f"{member.name!r} -> {link_target!r}" + ) + # Hardlink names are relative to the archive root; symlink + # targets are relative to the member's own directory. + anchor = dest if member.islnk() else (dest / member.name).parent + resolved_target = (anchor / link_target).resolve() + if not resolved_target.is_relative_to(dest_resolved): + raise ValueError( + f"Blocked link target escaping destination: " + f"{member.name!r} -> {link_target!r}" + ) tf.extractall(dest) # noqa: S202 diff --git a/lib/cli/tests/skills/test_safe_extract.py b/lib/cli/tests/skills/test_safe_extract.py new file mode 100644 index 0000000000..b99f0b337b --- /dev/null +++ b/lib/cli/tests/skills/test_safe_extract.py @@ -0,0 +1,107 @@ +"""Regression tests for path-traversal-safe archive extraction. + +Guards against symlink/hardlink-based path traversal in the Python < 3.12 +extraction fallback (`_safe_extractall`). The 3.12+ path relies on +`tarfile.extractall(..., filter="data")`; the fallback must provide the same +protection by validating link targets, not just member names. +""" + +from __future__ import annotations + +import io +import tarfile +from pathlib import Path + +import pytest + +from crewai_cli.experimental.skills.main import _safe_extractall + + +def _tar_from_members(build) -> tarfile.TarFile: + """Build an in-memory tar archive via `build(tf)` and return it for reading.""" + buf = io.BytesIO() + with tarfile.open(fileobj=buf, mode="w") as tf: + build(tf) + buf.seek(0) + return tarfile.open(fileobj=buf, mode="r") + + +def test_blocks_symlink_escaping_destination(tmp_path: Path) -> None: + """A symlink whose target escapes dest, plus a file written through it, + must be rejected before anything is extracted.""" + outside = tmp_path / "outside" + outside.mkdir() + dest = tmp_path / "dest" + dest.mkdir() + + def build(tf: tarfile.TarFile) -> None: + link = tarfile.TarInfo("link") + link.type = tarfile.SYMTYPE + link.linkname = str(outside) # absolute path outside dest + tf.addfile(link) + payload = b"pwned" + info = tarfile.TarInfo("link/evil.txt") + info.size = len(payload) + tf.addfile(info, io.BytesIO(payload)) + + with _tar_from_members(build) as tf: + with pytest.raises(ValueError, match="escaping destination"): + _safe_extractall(tf, dest) + + assert not (outside / "evil.txt").exists() + + +def test_blocks_relative_symlink_escaping_destination(tmp_path: Path) -> None: + """A relative symlink (../..) that escapes dest is also rejected.""" + dest = tmp_path / "dest" + dest.mkdir() + + def build(tf: tarfile.TarFile) -> None: + link = tarfile.TarInfo("sub/link") + link.type = tarfile.SYMTYPE + link.linkname = "../../outside" # escapes dest from sub/ + tf.addfile(link) + + with _tar_from_members(build) as tf: + with pytest.raises(ValueError, match="escaping destination"): + _safe_extractall(tf, dest) + + +def test_allows_benign_relative_symlink(tmp_path: Path) -> None: + """A symlink that stays within dest is permitted.""" + dest = tmp_path / "dest" + dest.mkdir() + + def build(tf: tarfile.TarFile) -> None: + payload = b"hi" + info = tarfile.TarInfo("real.txt") + info.size = len(payload) + tf.addfile(info, io.BytesIO(payload)) + link = tarfile.TarInfo("alias.txt") + link.type = tarfile.SYMTYPE + link.linkname = "real.txt" # stays inside dest + tf.addfile(link) + + with _tar_from_members(build) as tf: + _safe_extractall(tf, dest) + + assert (dest / "real.txt").read_bytes() == b"hi" + + +def test_allows_benign_archive(tmp_path: Path) -> None: + """An ordinary archive of regular files extracts correctly.""" + dest = tmp_path / "dest" + dest.mkdir() + + def build(tf: tarfile.TarFile) -> None: + for name, body in (("SKILL.md", b"# skill"), ("scripts/run.py", b"print(1)")): + payload = body + info = tarfile.TarInfo(name) + info.size = len(payload) + tf.addfile(info, io.BytesIO(payload)) + + with _tar_from_members(build) as tf: + _safe_extractall(tf, dest) + + assert (dest / "SKILL.md").read_bytes() == b"# skill" + assert (dest / "scripts" / "run.py").read_bytes() == b"print(1)" From 580878dbef3554dd3a49a4033817fcb33d4ff023 Mon Sep 17 00:00:00 2001 From: Rip&Tear <84775494+theCyberTech@users.noreply.github.com> Date: Wed, 24 Jun 2026 13:47:06 +0800 Subject: [PATCH 2/3] Harden skill cache archive extraction --- .../crewai_cli/experimental/skills/main.py | 4 +- lib/cli/tests/skills/test_safe_extract.py | 22 ++++- .../src/crewai/experimental/skills/cache.py | 25 +++++- .../tests/experimental/skills/test_cache.py | 80 ++++++++++++++++++- 4 files changed, 125 insertions(+), 6 deletions(-) diff --git a/lib/cli/src/crewai_cli/experimental/skills/main.py b/lib/cli/src/crewai_cli/experimental/skills/main.py index ecf94a1215..b671b7733c 100644 --- a/lib/cli/src/crewai_cli/experimental/skills/main.py +++ b/lib/cli/src/crewai_cli/experimental/skills/main.py @@ -378,7 +378,7 @@ def _read_version(self, skill_md: Path) -> str | None: def _safe_extractall(tf: tarfile.TarFile, dest: Path) -> None: - """Path-traversal-safe extraction for Python < 3.12. + """Path-traversal-safe extraction for Python versions without tar filters. Validates both the member's own path and, for symlink/hardlink members, the link target. Without the link-target check a malicious archive can @@ -386,7 +386,7 @@ def _safe_extractall(tf: tarfile.TarFile, dest: Path) -> None: followed by a regular member written *through* that link (``link/authorized_keys``), escaping ``dest`` even though every member name resolves inside it. This mirrors the protection that - ``tarfile.extractall(..., filter="data")`` provides on Python >= 3.12. + ``tarfile.extractall(..., filter="data")`` provides when available. """ dest_resolved = dest.resolve() for member in tf.getmembers(): diff --git a/lib/cli/tests/skills/test_safe_extract.py b/lib/cli/tests/skills/test_safe_extract.py index b99f0b337b..9761f564dc 100644 --- a/lib/cli/tests/skills/test_safe_extract.py +++ b/lib/cli/tests/skills/test_safe_extract.py @@ -1,7 +1,7 @@ """Regression tests for path-traversal-safe archive extraction. -Guards against symlink/hardlink-based path traversal in the Python < 3.12 -extraction fallback (`_safe_extractall`). The 3.12+ path relies on +Guards against symlink/hardlink-based path traversal in the fallback used on +Python versions without tarfile extraction filters. The filtered path relies on `tarfile.extractall(..., filter="data")`; the fallback must provide the same protection by validating link targets, not just member names. """ @@ -67,6 +67,22 @@ def build(tf: tarfile.TarFile) -> None: _safe_extractall(tf, dest) +def test_blocks_hardlink_escaping_destination(tmp_path: Path) -> None: + """A hardlink whose target escapes dest is rejected.""" + dest = tmp_path / "dest" + dest.mkdir() + + def build(tf: tarfile.TarFile) -> None: + link = tarfile.TarInfo("escape") + link.type = tarfile.LNKTYPE + link.linkname = "../outside.txt" # escapes archive root + tf.addfile(link) + + with _tar_from_members(build) as tf: + with pytest.raises(ValueError, match="escaping destination"): + _safe_extractall(tf, dest) + + def test_allows_benign_relative_symlink(tmp_path: Path) -> None: """A symlink that stays within dest is permitted.""" dest = tmp_path / "dest" @@ -86,6 +102,8 @@ def build(tf: tarfile.TarFile) -> None: _safe_extractall(tf, dest) assert (dest / "real.txt").read_bytes() == b"hi" + assert (dest / "alias.txt").is_symlink() + assert (dest / "alias.txt").readlink() == Path("real.txt") def test_allows_benign_archive(tmp_path: Path) -> None: diff --git a/lib/crewai/src/crewai/experimental/skills/cache.py b/lib/crewai/src/crewai/experimental/skills/cache.py index e9752c4e8d..2d334157a5 100644 --- a/lib/crewai/src/crewai/experimental/skills/cache.py +++ b/lib/crewai/src/crewai/experimental/skills/cache.py @@ -9,6 +9,7 @@ from datetime import datetime, timezone import json import logging +import os from pathlib import Path import tarfile from typing import TypedDict @@ -127,12 +128,34 @@ def invalidate(self, org: str, name: str) -> bool: def _safe_extractall(tf: tarfile.TarFile, dest: Path) -> None: - """Path-traversal-safe extraction for Python < 3.12.""" + """Path-traversal-safe extraction for Python versions without tar filters. + + Validates both the member's own path and, for symlink/hardlink members, + the link target. Without the link-target check a malicious archive can + plant a symlink that escapes ``dest`` followed by a regular member written + through that link, escaping ``dest`` even though every member name resolves + inside it. This mirrors the protection that + ``tarfile.extractall(..., filter="data")`` provides when available. + """ dest_resolved = dest.resolve() for member in tf.getmembers(): member_path = (dest / member.name).resolve() if not member_path.is_relative_to(dest_resolved): raise ValueError(f"Blocked path traversal attempt: {member.name!r}") + if member.issym() or member.islnk(): + link_target = member.linkname + if os.path.isabs(link_target): + raise ValueError( + f"Blocked link target escaping destination: " + f"{member.name!r} -> {link_target!r}" + ) + anchor = dest if member.islnk() else (dest / member.name).parent + resolved_target = (anchor / link_target).resolve() + if not resolved_target.is_relative_to(dest_resolved): + raise ValueError( + f"Blocked link target escaping destination: " + f"{member.name!r} -> {link_target!r}" + ) tf.extractall(dest) # noqa: S202 diff --git a/lib/crewai/tests/experimental/skills/test_cache.py b/lib/crewai/tests/experimental/skills/test_cache.py index 8b458bb3eb..21b8f24732 100644 --- a/lib/crewai/tests/experimental/skills/test_cache.py +++ b/lib/crewai/tests/experimental/skills/test_cache.py @@ -8,7 +8,9 @@ import tarfile from pathlib import Path -from crewai.experimental.skills.cache import SkillCacheManager +import pytest + +from crewai.experimental.skills.cache import SkillCacheManager, _safe_extractall def _make_tar_gz(files: dict[str, str]) -> bytes: @@ -35,6 +37,15 @@ def _make_tar_gz(files: dict[str, str]) -> bytes: return out.getvalue() +def _tar_from_members(build) -> tarfile.TarFile: + """Build an in-memory tar archive via `build(tf)` and return it for reading.""" + buf = io.BytesIO() + with tarfile.open(fileobj=buf, mode="w") as tf: + build(tf) + buf.seek(0) + return tarfile.open(fileobj=buf, mode="r") + + class TestSkillCacheManager: def test_get_cached_path_missing(self, tmp_path: Path) -> None: cache = SkillCacheManager(cache_root=tmp_path) @@ -113,3 +124,70 @@ def test_store_version_none(self, tmp_path: Path) -> None: dest = cache.store("acme", "my-skill", None, archive) meta = json.loads((dest / ".crewai_meta.json").read_text()) assert meta["version"] is None + + +def test_safe_extractall_blocks_symlink_escaping_cache_destination( + tmp_path: Path, +) -> None: + """A symlink whose target escapes dest is rejected before extraction.""" + outside = tmp_path / "outside" + outside.mkdir() + dest = tmp_path / "dest" + dest.mkdir() + + def build(tf: tarfile.TarFile) -> None: + link = tarfile.TarInfo("link") + link.type = tarfile.SYMTYPE + link.linkname = str(outside) + tf.addfile(link) + payload = b"pwned" + info = tarfile.TarInfo("link/evil.txt") + info.size = len(payload) + tf.addfile(info, io.BytesIO(payload)) + + with _tar_from_members(build) as tf: + with pytest.raises(ValueError, match="escaping destination"): + _safe_extractall(tf, dest) + + assert not (outside / "evil.txt").exists() + + +def test_safe_extractall_blocks_hardlink_escaping_cache_destination( + tmp_path: Path, +) -> None: + """A hardlink whose target escapes dest is rejected.""" + dest = tmp_path / "dest" + dest.mkdir() + + def build(tf: tarfile.TarFile) -> None: + link = tarfile.TarInfo("escape") + link.type = tarfile.LNKTYPE + link.linkname = "../outside.txt" + tf.addfile(link) + + with _tar_from_members(build) as tf: + with pytest.raises(ValueError, match="escaping destination"): + _safe_extractall(tf, dest) + + +def test_safe_extractall_allows_benign_cache_symlink(tmp_path: Path) -> None: + """A symlink that stays within dest is permitted.""" + dest = tmp_path / "dest" + dest.mkdir() + + def build(tf: tarfile.TarFile) -> None: + payload = b"hi" + info = tarfile.TarInfo("real.txt") + info.size = len(payload) + tf.addfile(info, io.BytesIO(payload)) + link = tarfile.TarInfo("alias.txt") + link.type = tarfile.SYMTYPE + link.linkname = "real.txt" + tf.addfile(link) + + with _tar_from_members(build) as tf: + _safe_extractall(tf, dest) + + assert (dest / "real.txt").read_bytes() == b"hi" + assert (dest / "alias.txt").is_symlink() + assert (dest / "alias.txt").readlink() == Path("real.txt") From f2e5c31b4a67ae153754fcac37efa931388a5f18 Mon Sep 17 00:00:00 2001 From: Rip&Tear <84775494+theCyberTech@users.noreply.github.com> Date: Wed, 24 Jun 2026 13:56:22 +0800 Subject: [PATCH 3/3] Reject special skill archive members --- .../src/crewai_cli/experimental/skills/main.py | 2 ++ lib/cli/tests/skills/test_safe_extract.py | 15 +++++++++++++++ .../src/crewai/experimental/skills/cache.py | 2 ++ .../tests/experimental/skills/test_cache.py | 15 +++++++++++++++ 4 files changed, 34 insertions(+) diff --git a/lib/cli/src/crewai_cli/experimental/skills/main.py b/lib/cli/src/crewai_cli/experimental/skills/main.py index b671b7733c..612b85d820 100644 --- a/lib/cli/src/crewai_cli/experimental/skills/main.py +++ b/lib/cli/src/crewai_cli/experimental/skills/main.py @@ -393,6 +393,8 @@ def _safe_extractall(tf: tarfile.TarFile, dest: Path) -> None: member_path = (dest / member.name).resolve() if not member_path.is_relative_to(dest_resolved): raise ValueError(f"Blocked path traversal attempt: {member.name!r}") + if not (member.isfile() or member.isdir() or member.issym() or member.islnk()): + raise ValueError(f"Blocked unsupported tar member: {member.name!r}") if member.issym() or member.islnk(): link_target = member.linkname # Absolute link targets always escape the destination. diff --git a/lib/cli/tests/skills/test_safe_extract.py b/lib/cli/tests/skills/test_safe_extract.py index 9761f564dc..f1083f5fa8 100644 --- a/lib/cli/tests/skills/test_safe_extract.py +++ b/lib/cli/tests/skills/test_safe_extract.py @@ -83,6 +83,21 @@ def build(tf: tarfile.TarFile) -> None: _safe_extractall(tf, dest) +def test_blocks_special_tar_member(tmp_path: Path) -> None: + """Special tar members such as FIFOs are rejected.""" + dest = tmp_path / "dest" + dest.mkdir() + + def build(tf: tarfile.TarFile) -> None: + fifo = tarfile.TarInfo("pipe") + fifo.type = tarfile.FIFOTYPE + tf.addfile(fifo) + + with _tar_from_members(build) as tf: + with pytest.raises(ValueError, match="unsupported tar member"): + _safe_extractall(tf, dest) + + def test_allows_benign_relative_symlink(tmp_path: Path) -> None: """A symlink that stays within dest is permitted.""" dest = tmp_path / "dest" diff --git a/lib/crewai/src/crewai/experimental/skills/cache.py b/lib/crewai/src/crewai/experimental/skills/cache.py index 2d334157a5..065c2c5211 100644 --- a/lib/crewai/src/crewai/experimental/skills/cache.py +++ b/lib/crewai/src/crewai/experimental/skills/cache.py @@ -142,6 +142,8 @@ def _safe_extractall(tf: tarfile.TarFile, dest: Path) -> None: member_path = (dest / member.name).resolve() if not member_path.is_relative_to(dest_resolved): raise ValueError(f"Blocked path traversal attempt: {member.name!r}") + if not (member.isfile() or member.isdir() or member.issym() or member.islnk()): + raise ValueError(f"Blocked unsupported tar member: {member.name!r}") if member.issym() or member.islnk(): link_target = member.linkname if os.path.isabs(link_target): diff --git a/lib/crewai/tests/experimental/skills/test_cache.py b/lib/crewai/tests/experimental/skills/test_cache.py index 21b8f24732..b6601dccf0 100644 --- a/lib/crewai/tests/experimental/skills/test_cache.py +++ b/lib/crewai/tests/experimental/skills/test_cache.py @@ -170,6 +170,21 @@ def build(tf: tarfile.TarFile) -> None: _safe_extractall(tf, dest) +def test_safe_extractall_blocks_special_cache_tar_member(tmp_path: Path) -> None: + """Special tar members such as FIFOs are rejected.""" + dest = tmp_path / "dest" + dest.mkdir() + + def build(tf: tarfile.TarFile) -> None: + fifo = tarfile.TarInfo("pipe") + fifo.type = tarfile.FIFOTYPE + tf.addfile(fifo) + + with _tar_from_members(build) as tf: + with pytest.raises(ValueError, match="unsupported tar member"): + _safe_extractall(tf, dest) + + def test_safe_extractall_allows_benign_cache_symlink(tmp_path: Path) -> None: """A symlink that stays within dest is permitted.""" dest = tmp_path / "dest"