Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion lib/cli/src/crewai_cli/experimental/skills/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,12 +378,40 @@ def _read_version(self, skill_md: Path) -> str | None:


def _safe_extractall(tf: tarfile.TarFile, dest: Path) -> None:
"""Path-traversal-safe extraction for Python < 3.12."""
"""Path-traversal-safe extraction for Python versions without tar filters.

Validates both the member's own path and, for symlink/hardlink members,
the link target. Without the link-target check a malicious archive can
plant a symlink that escapes ``dest`` (e.g. ``link -> /home/user/.ssh``)
followed by a regular member written *through* that link
(``link/authorized_keys``), escaping ``dest`` even though every member
name resolves inside it. This mirrors the protection that
``tarfile.extractall(..., filter="data")`` provides when available.
"""
dest_resolved = dest.resolve()
for member in tf.getmembers():
member_path = (dest / member.name).resolve()
if not member_path.is_relative_to(dest_resolved):
raise ValueError(f"Blocked path traversal attempt: {member.name!r}")
if not (member.isfile() or member.isdir() or member.issym() or member.islnk()):
raise ValueError(f"Blocked unsupported tar member: {member.name!r}")
if member.issym() or member.islnk():
link_target = member.linkname
# Absolute link targets always escape the destination.
if os.path.isabs(link_target):
raise ValueError(
f"Blocked link target escaping destination: "
f"{member.name!r} -> {link_target!r}"
)
# Hardlink names are relative to the archive root; symlink
# targets are relative to the member's own directory.
anchor = dest if member.islnk() else (dest / member.name).parent
resolved_target = (anchor / link_target).resolve()
if not resolved_target.is_relative_to(dest_resolved):
raise ValueError(
f"Blocked link target escaping destination: "
f"{member.name!r} -> {link_target!r}"
)
tf.extractall(dest) # noqa: S202


Expand Down
140 changes: 140 additions & 0 deletions lib/cli/tests/skills/test_safe_extract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
"""Regression tests for path-traversal-safe archive extraction.

Guards against symlink/hardlink-based path traversal in the fallback used on
Python versions without tarfile extraction filters. The filtered path relies on
`tarfile.extractall(..., filter="data")`; the fallback must provide the same
protection by validating link targets, not just member names.
"""

from __future__ import annotations

import io
import tarfile
from pathlib import Path

import pytest

from crewai_cli.experimental.skills.main import _safe_extractall


def _tar_from_members(build) -> tarfile.TarFile:
"""Build an in-memory tar archive via `build(tf)` and return it for reading."""
buf = io.BytesIO()
with tarfile.open(fileobj=buf, mode="w") as tf:
build(tf)
buf.seek(0)
return tarfile.open(fileobj=buf, mode="r")


def test_blocks_symlink_escaping_destination(tmp_path: Path) -> None:
"""A symlink whose target escapes dest, plus a file written through it,
must be rejected before anything is extracted."""
outside = tmp_path / "outside"
outside.mkdir()
dest = tmp_path / "dest"
dest.mkdir()

def build(tf: tarfile.TarFile) -> None:
link = tarfile.TarInfo("link")
link.type = tarfile.SYMTYPE
link.linkname = str(outside) # absolute path outside dest
tf.addfile(link)
payload = b"pwned"
info = tarfile.TarInfo("link/evil.txt")
info.size = len(payload)
tf.addfile(info, io.BytesIO(payload))

with _tar_from_members(build) as tf:
with pytest.raises(ValueError, match="escaping destination"):
_safe_extractall(tf, dest)

assert not (outside / "evil.txt").exists()


def test_blocks_relative_symlink_escaping_destination(tmp_path: Path) -> None:
"""A relative symlink (../..) that escapes dest is also rejected."""
dest = tmp_path / "dest"
dest.mkdir()

def build(tf: tarfile.TarFile) -> None:
link = tarfile.TarInfo("sub/link")
link.type = tarfile.SYMTYPE
link.linkname = "../../outside" # escapes dest from sub/
tf.addfile(link)

with _tar_from_members(build) as tf:
with pytest.raises(ValueError, match="escaping destination"):
_safe_extractall(tf, dest)


def test_blocks_hardlink_escaping_destination(tmp_path: Path) -> None:
"""A hardlink whose target escapes dest is rejected."""
dest = tmp_path / "dest"
dest.mkdir()

def build(tf: tarfile.TarFile) -> None:
link = tarfile.TarInfo("escape")
link.type = tarfile.LNKTYPE
link.linkname = "../outside.txt" # escapes archive root
tf.addfile(link)

with _tar_from_members(build) as tf:
with pytest.raises(ValueError, match="escaping destination"):
_safe_extractall(tf, dest)


def test_blocks_special_tar_member(tmp_path: Path) -> None:
"""Special tar members such as FIFOs are rejected."""
dest = tmp_path / "dest"
dest.mkdir()

def build(tf: tarfile.TarFile) -> None:
fifo = tarfile.TarInfo("pipe")
fifo.type = tarfile.FIFOTYPE
tf.addfile(fifo)

with _tar_from_members(build) as tf:
with pytest.raises(ValueError, match="unsupported tar member"):
_safe_extractall(tf, dest)


def test_allows_benign_relative_symlink(tmp_path: Path) -> None:
"""A symlink that stays within dest is permitted."""
dest = tmp_path / "dest"
dest.mkdir()

def build(tf: tarfile.TarFile) -> None:
payload = b"hi"
info = tarfile.TarInfo("real.txt")
info.size = len(payload)
tf.addfile(info, io.BytesIO(payload))
link = tarfile.TarInfo("alias.txt")
link.type = tarfile.SYMTYPE
link.linkname = "real.txt" # stays inside dest
tf.addfile(link)

with _tar_from_members(build) as tf:
_safe_extractall(tf, dest)

assert (dest / "real.txt").read_bytes() == b"hi"
assert (dest / "alias.txt").is_symlink()
assert (dest / "alias.txt").readlink() == Path("real.txt")


def test_allows_benign_archive(tmp_path: Path) -> None:
"""An ordinary archive of regular files extracts correctly."""
dest = tmp_path / "dest"
dest.mkdir()

def build(tf: tarfile.TarFile) -> None:
for name, body in (("SKILL.md", b"# skill"), ("scripts/run.py", b"print(1)")):
payload = body
info = tarfile.TarInfo(name)
info.size = len(payload)
tf.addfile(info, io.BytesIO(payload))

with _tar_from_members(build) as tf:
_safe_extractall(tf, dest)

assert (dest / "SKILL.md").read_bytes() == b"# skill"
assert (dest / "scripts" / "run.py").read_bytes() == b"print(1)"
27 changes: 26 additions & 1 deletion lib/crewai/src/crewai/experimental/skills/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from datetime import datetime, timezone
import json
import logging
import os
from pathlib import Path
import tarfile
from typing import TypedDict
Expand Down Expand Up @@ -127,12 +128,36 @@ def invalidate(self, org: str, name: str) -> bool:


def _safe_extractall(tf: tarfile.TarFile, dest: Path) -> None:
"""Path-traversal-safe extraction for Python < 3.12."""
"""Path-traversal-safe extraction for Python versions without tar filters.

Validates both the member's own path and, for symlink/hardlink members,
the link target. Without the link-target check a malicious archive can
plant a symlink that escapes ``dest`` followed by a regular member written
through that link, escaping ``dest`` even though every member name resolves
inside it. This mirrors the protection that
``tarfile.extractall(..., filter="data")`` provides when available.
"""
dest_resolved = dest.resolve()
for member in tf.getmembers():
member_path = (dest / member.name).resolve()
if not member_path.is_relative_to(dest_resolved):
raise ValueError(f"Blocked path traversal attempt: {member.name!r}")
if not (member.isfile() or member.isdir() or member.issym() or member.islnk()):
raise ValueError(f"Blocked unsupported tar member: {member.name!r}")
if member.issym() or member.islnk():
link_target = member.linkname
if os.path.isabs(link_target):
raise ValueError(
f"Blocked link target escaping destination: "
f"{member.name!r} -> {link_target!r}"
)
anchor = dest if member.islnk() else (dest / member.name).parent
resolved_target = (anchor / link_target).resolve()
if not resolved_target.is_relative_to(dest_resolved):
raise ValueError(
f"Blocked link target escaping destination: "
f"{member.name!r} -> {link_target!r}"
)
tf.extractall(dest) # noqa: S202


Expand Down
95 changes: 94 additions & 1 deletion lib/crewai/tests/experimental/skills/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
import tarfile
from pathlib import Path

from crewai.experimental.skills.cache import SkillCacheManager
import pytest

from crewai.experimental.skills.cache import SkillCacheManager, _safe_extractall


def _make_tar_gz(files: dict[str, str]) -> bytes:
Expand All @@ -35,6 +37,15 @@ def _make_tar_gz(files: dict[str, str]) -> bytes:
return out.getvalue()


def _tar_from_members(build) -> tarfile.TarFile:
"""Build an in-memory tar archive via `build(tf)` and return it for reading."""
buf = io.BytesIO()
with tarfile.open(fileobj=buf, mode="w") as tf:
build(tf)
buf.seek(0)
return tarfile.open(fileobj=buf, mode="r")


class TestSkillCacheManager:
def test_get_cached_path_missing(self, tmp_path: Path) -> None:
cache = SkillCacheManager(cache_root=tmp_path)
Expand Down Expand Up @@ -113,3 +124,85 @@ def test_store_version_none(self, tmp_path: Path) -> None:
dest = cache.store("acme", "my-skill", None, archive)
meta = json.loads((dest / ".crewai_meta.json").read_text())
assert meta["version"] is None


def test_safe_extractall_blocks_symlink_escaping_cache_destination(
tmp_path: Path,
) -> None:
"""A symlink whose target escapes dest is rejected before extraction."""
outside = tmp_path / "outside"
outside.mkdir()
dest = tmp_path / "dest"
dest.mkdir()

def build(tf: tarfile.TarFile) -> None:
link = tarfile.TarInfo("link")
link.type = tarfile.SYMTYPE
link.linkname = str(outside)
tf.addfile(link)
payload = b"pwned"
info = tarfile.TarInfo("link/evil.txt")
info.size = len(payload)
tf.addfile(info, io.BytesIO(payload))

with _tar_from_members(build) as tf:
with pytest.raises(ValueError, match="escaping destination"):
_safe_extractall(tf, dest)

assert not (outside / "evil.txt").exists()


def test_safe_extractall_blocks_hardlink_escaping_cache_destination(
tmp_path: Path,
) -> None:
"""A hardlink whose target escapes dest is rejected."""
dest = tmp_path / "dest"
dest.mkdir()

def build(tf: tarfile.TarFile) -> None:
link = tarfile.TarInfo("escape")
link.type = tarfile.LNKTYPE
link.linkname = "../outside.txt"
tf.addfile(link)

with _tar_from_members(build) as tf:
with pytest.raises(ValueError, match="escaping destination"):
_safe_extractall(tf, dest)


def test_safe_extractall_blocks_special_cache_tar_member(tmp_path: Path) -> None:
"""Special tar members such as FIFOs are rejected."""
dest = tmp_path / "dest"
dest.mkdir()

def build(tf: tarfile.TarFile) -> None:
fifo = tarfile.TarInfo("pipe")
fifo.type = tarfile.FIFOTYPE
tf.addfile(fifo)

with _tar_from_members(build) as tf:
with pytest.raises(ValueError, match="unsupported tar member"):
_safe_extractall(tf, dest)


def test_safe_extractall_allows_benign_cache_symlink(tmp_path: Path) -> None:
"""A symlink that stays within dest is permitted."""
dest = tmp_path / "dest"
dest.mkdir()

def build(tf: tarfile.TarFile) -> None:
payload = b"hi"
info = tarfile.TarInfo("real.txt")
info.size = len(payload)
tf.addfile(info, io.BytesIO(payload))
link = tarfile.TarInfo("alias.txt")
link.type = tarfile.SYMTYPE
link.linkname = "real.txt"
tf.addfile(link)

with _tar_from_members(build) as tf:
_safe_extractall(tf, dest)

assert (dest / "real.txt").read_bytes() == b"hi"
assert (dest / "alias.txt").is_symlink()
assert (dest / "alias.txt").readlink() == Path("real.txt")
Loading