Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/paperbench_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ jobs:
lfs: true
fetch-depth: 1

- name: Hydrate PaperBench data
run: |
git lfs fetch --include "project/paperbench/data/**"
git lfs checkout project/paperbench/data

- name: Install uv
uses: astral-sh/setup-uv@v5

Expand Down
3 changes: 3 additions & 0 deletions .lfsconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[lfs]
fetchexclude = project/paperbench/data/**
skipdownloaderrors = true
29 changes: 20 additions & 9 deletions project/paperbench/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,23 +43,34 @@ All commands in this README should be run from the [root](./) of the PaperBench

### Installation

Install PaperBench with [uv](https://docs.astral.sh/uv/)[^1]
Install PaperBench with [uv](https://docs.astral.sh/uv/)

```console
UV_GIT_LFS=1 uv sync
uv sync
```

[^1]:
If you get an LFS error here, try cleaning your uv cache: `uv cache clean` and then re-running the command. See
[this issue comment](https://github.com/astral-sh/uv/issues/12938#issuecomment-2816186433)

### Get the data

The dataset is stored using [Git-LFS](https://git-lfs.com/). Download and install LFS, then run:
The dataset is stored using [Git-LFS](https://git-lfs.com/) and is intentionally not fetched during the install
step above. Hydrate it manually and point PaperBench at the hydrated directory:

```console
git clone https://github.com/openai/frontier-evals.git --filter=blob:none
cd frontier-evals
git lfs fetch --include "project/paperbench/data/**"
git lfs checkout project/paperbench/data
export PAPERBENCH_DATA_DIR="$(pwd)/project/paperbench/data" # add to your shell profile
```

If you are already working from a full `frontier-evals` clone, you can run the `git lfs fetch` / `git lfs checkout`
commands from the repository root and skip setting `PAPERBENCH_DATA_DIR` (the default path resolves to
`<repo>/project/paperbench/data`). When the data is stored elsewhere, set the environment variable to the location
you hydrated.

The JudgeEval tarballs that cannot be redistributed automatically can still be created with:

```console
git lfs fetch --all
git lfs pull
PAPERBENCH_DATA_DIR=/path/to/data uv run python -m paperbench.judge.judge_eval.download_data
```

### Environment variables
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def process_benchmark(repo_url: str, commit_hash: str, output_dir: str) -> bool:


def main() -> None:
data_dir = get_paperbench_data_dir()
data_dir = get_paperbench_data_dir(require_exists=False)

for benchmark, config in REPOS.items():
print(f"\nProcessing {benchmark}...")
Expand Down
46 changes: 43 additions & 3 deletions project/paperbench/paperbench/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,25 @@
P = ParamSpec("P")
R = TypeVar("R")

DATA_DIR_ENV_VAR = "PAPERBENCH_DATA_DIR"


class PaperbenchDataNotFoundError(FileNotFoundError):
"""Raised when we cannot locate the PaperBench data directory."""

def __init__(self, candidate: Path):
message = (
"Unable to locate the PaperBench data directory.\n"
f"Checked: {candidate}\n"
"If you installed PaperBench from a git dependency, the dataset is not pulled "
"automatically. Set the PAPERBENCH_DATA_DIR environment variable to point to a "
"clone of `frontier-evals` where you've run `git lfs fetch --include "
'"project/paperbench/data/**"` (and `git lfs checkout`), or hydrate the data in '
"another location and point PAPERBENCH_DATA_DIR at it."
)
super().__init__(message)
self.candidate = candidate


def in_ci() -> bool:
"""Checks if the tests are running in CI."""
Expand Down Expand Up @@ -72,10 +91,31 @@ def get_root() -> Path:
return path


def get_paperbench_data_dir() -> Path:
"""Returns an absolute path to the paperbench data directory."""
def get_paperbench_data_dir(*, require_exists: bool = True) -> Path:
"""Returns an absolute path to the PaperBench data directory.

Args:
require_exists: When True (default) we raise a PaperbenchDataNotFoundError if the
resolved directory is missing. When False we return the candidate directory and
create it if an explicit override was provided.
"""

return get_root().parent / "data"
override = os.environ.get(DATA_DIR_ENV_VAR)
if override:
override_path = Path(override).expanduser()
if override_path.exists():
return override_path
if not require_exists:
override_path.mkdir(parents=True, exist_ok=True)
return override_path
raise PaperbenchDataNotFoundError(override_path)

default_path = get_root().parent / "data"
if default_path.exists():
return default_path
if require_exists:
raise PaperbenchDataNotFoundError(default_path)
return default_path


def build_canonical_sub_path(run_dir: Path | str, timestamp: str) -> str:
Expand Down