Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 59 additions & 21 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,74 @@ name: CI

on:
pull_request:
branches:
- main
branches: [main]
push:
branches: [main]

jobs:
build:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v2
with:
enable-cache: true

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Install ruff and pre-commit
run: uv pip install --system ruff pre-commit

- name: Run ruff
run: |
ruff check src/ tests/
ruff format --check src/ tests/

- name: Run pre-commit
run: pre-commit run --all-files
env:
SKIP: pytest-fast

test:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
python-version: ['3.9', '3.10', '3.11']
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
with:
fetch-depth: 0 # Needed for setuptools_scm

- name: Install uv
uses: astral-sh/setup-uv@v2
with:
enable-cache: true

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install setuptools wheel setuptools_scm cython numpy
- name: Install package with dev dependencies
run: uv sync --all-extras

- name: Build and install package
run: |
python setup.py build_ext --inplace
python -m pip install .
- name: Build Cython extensions
run: uv run python setup.py build_ext --inplace

- name: Test installation
run: |
python -m unittest discover -s tests
- name: Run pytest
run: uv run pytest tests/ -v --cov=wordllama --cov-report=xml --cov-report=term-missing

- name: Upload coverage to Codecov
if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.11'
uses: codecov/codecov-action@v3
with:
files: ./coverage.xml
fail_ci_if_error: false
8 changes: 5 additions & 3 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -222,10 +222,13 @@ jobs:
with:
python-version: '3.11'

- name: Install uv
uses: astral-sh/setup-uv@v2

- name: Install required Python packages
run: |
python -m pip install --upgrade pip setuptools setuptools_scm wheel cython numpy
python -m pip install build twine
pip install uv
uv pip install --system setuptools setuptools_scm wheel cython "numpy>=2" build twine

- name: Print detected version
run: |
Expand Down Expand Up @@ -262,4 +265,3 @@ jobs:
user: __token__
password: ${{ secrets.PYPI_API_TOKEN }}
packages_dir: dist/

13 changes: 9 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,15 @@ cython_debug/
#.idea/

# Ignore generated Cython files
wordllama/algorithms/*.c
wordllama/algorithms/*.cpp
wordllama/algorithms/*.html
src/wordllama/algorithms/*.c
src/wordllama/algorithms/*.cpp
src/wordllama/algorithms/*.html

# Ignore the generated version file
wordllama/_version.py
src/wordllama/_version.py

# uv
uv.lock

# ruff
.ruff_cache/
32 changes: 32 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v6.0.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
exclude: '\.pyx$|\.pxd$'
- id: check-yaml
- id: check-toml
- id: check-json
- id: check-added-large-files
args: ['--maxkb=1024']
exclude: 'weights/.*\.safetensors$'
- id: check-merge-conflict

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.14.7
hooks:
- id: ruff
args: [--fix]
- id: ruff-format

- repo: local
hooks:
- id: pytest-fast
name: pytest-fast
entry: uv run pytest
language: system
pass_filenames: false
args: [-m, "not slow", --tb=short, -q]
types: [python]
stages: [pre-commit]
12 changes: 6 additions & 6 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
include LICENSE
include README.md
recursive-include wordllama *.py *.toml *.json
include wordllama/weights/*.safetensors
include wordllama/algorithms/*.pyx
include wordllama/algorithms/*.pxd
include wordllama/algorithms/*.so
include wordllama/algorithms/*.pyd
recursive-include src/wordllama *.py *.toml *.json
include src/wordllama/weights/*.safetensors
include src/wordllama/algorithms/*.pyx
include src/wordllama/algorithms/*.pxd
include src/wordllama/algorithms/*.so
include src/wordllama/algorithms/*.pyd
66 changes: 66 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
.PHONY: help install install-dev build clean test test-cov lint format pre-commit-install pre-commit-run tag all

help:
@echo "WordLlama Development Makefile"
@echo ""
@echo "Available targets:"
@echo " install - Install package and dependencies"
@echo " install-dev - Install package with dev dependencies"
@echo " build - Build Cython extensions"
@echo " clean - Clean build artifacts"
@echo " test - Run tests"
@echo " test-cov - Run tests with coverage"
@echo " lint - Run ruff linter"
@echo " format - Format code with ruff"
@echo " pre-commit-install - Install pre-commit hooks"
@echo " pre-commit-run - Run pre-commit on all files"
@echo " tag VERSION=X.Y.Z - Create and push a new release tag"
@echo " all - Clean, build, lint, format, and test"

install:
uv sync

install-dev:
uv sync --all-extras

build:
uv run python setup.py build_ext --inplace

clean:
rm -rf build/ dist/ *.egg-info
rm -rf src/wordllama/**/*.so src/wordllama/**/*.c src/wordllama/**/*.cpp
rm -rf .pytest_cache .ruff_cache htmlcov .coverage
find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true

test:
uv run pytest

test-cov:
uv run pytest --cov=wordllama --cov-report=html --cov-report=term-missing

lint:
uv run ruff check src/ tests/

format:
uv run ruff format src/ tests/
uv run ruff check --fix src/ tests/

pre-commit-install:
uv run pre-commit install

pre-commit-run:
uv run pre-commit run --all-files

tag:
@if [ -z "$(VERSION)" ]; then \
echo "Error: VERSION is required. Usage: make tag VERSION=0.4.0"; \
exit 1; \
fi
@echo "Creating and pushing tag v$(VERSION)..."
git tag -a v$(VERSION) -m "Release version $(VERSION)"
git push origin v$(VERSION)
@echo "- Tag v$(VERSION) created and pushed"
@echo "- GitHub Actions will build and publish the release"

all: clean build lint format test
@echo "- All tasks completed successfully"
21 changes: 18 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ print(f"\nBest Match: {best_candidate} (Score: {sim_key(best_candidate):.4f})")
# 2. Foundations of neural science (Score: 0.2115)
# 3. Introduction to philosophy: logic (Score: 0.1067)
# 4. Cooking delicious pasta at home (Score: 0.0045)
#
#
# Best Match: Introduction to neural networks (Score: 0.3414)
```

Expand Down Expand Up @@ -143,7 +143,7 @@ The following table presents the performance of WordLlama models compared to oth

8k documents from the `ag_news` dataset
- Single core performance (CPU), i9 12th gen, DDR4 3200
- NVIDIA A4500 (GPU)
- NVIDIA A4500 (GPU)

<p align="center">
<img src="benchmark/inference_benchmark.png" alt="Word Llama" width="80%">
Expand Down Expand Up @@ -200,7 +200,7 @@ print(f"\nBest Match: {best_candidate} (Score: {sim_key(best_candidate):.4f})")
# 2. Foundations of neural science (Score: 0.2115)
# 3. Introduction to philosophy: logic (Score: 0.1067)
# 4. Cooking delicious pasta at home (Score: 0.0045)
#
#
# Best Match: Introduction to neural networks (Score: 0.3414)
```

Expand Down Expand Up @@ -341,6 +341,21 @@ The L2 Supercat model was trained using a batch size of 512 on a single A100 GPU
- DSPy evaluators
- Retrieval-Augmented Generation (RAG) pipelines

## Development

For local development:

```bash
git clone https://github.com/dleemiller/WordLlama.git
cd WordLlama
pip install uv
uv sync --all-extras
uv run python setup.py build_ext --inplace
uv run pytest
```

See the [Makefile](Makefile) for common development commands.

## Extracting Token Embeddings

To extract token embeddings from a model, ensure you have agreed to the user agreement and logged in using the Hugging Face CLI (for LLaMA models). You can then use the following snippet:
Expand Down
9 changes: 4 additions & 5 deletions build_tools/build_wheels.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,17 @@ if [[ "$(uname)" == "Darwin" ]]; then
else
# For x86_64 builds, adjust deployment target and install llvm-openmp via Conda
export MACOSX_DEPLOYMENT_TARGET=13.0 # Matches Homebrew's libomp minimum

# Install llvm-openmp via Conda
OPENMP_URL="https://anaconda.org/conda-forge/llvm-openmp/19.1.6/download/osx-64/llvm-openmp-19.1.6-ha54dae1_0.conda"
echo "Installing llvm-openmp via Conda for x86_64..."
sudo conda create -n build $OPENMP_URL
PREFIX="$CONDA_HOME/envs/build"

# Use system Clang and point it to Conda's OpenMP paths
export CC="/usr/bin/clang"
export CXX="/usr/bin/clang++"

# Locate omp.h dynamically
OMP_INCLUDE_DIR=$(find $PREFIX -type d -name "include" | head -n 1)
if [[ -n "$OMP_INCLUDE_DIR" && -f "$OMP_INCLUDE_DIR/omp.h" ]]; then
Expand All @@ -52,7 +52,7 @@ if [[ "$(uname)" == "Darwin" ]]; then
ls -R $PREFIX # Debug: Show the structure of the Conda environment
exit 1
fi

# Set flags
export CPPFLAGS="-Xpreprocessor -fopenmp -I$OMP_INCLUDE_DIR"
export CFLAGS="-I$OMP_INCLUDE_DIR -ffp-contract=off"
Expand All @@ -74,4 +74,3 @@ python -m pip install --upgrade pip
# Install cibuildwheel and build wheels
python -m pip install --upgrade cibuildwheel
python -m cibuildwheel --output-dir wheelhouse

1 change: 0 additions & 1 deletion classifiers.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
Programming Language :: Python :: 3
License :: OSI Approved :: MIT License
Operating System :: OS Independent

Loading
Loading