MASSIVEMAGNETICS · MASSIVEMAGNETICS · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026
diff --git a/.github/workflows/smoke-tests.yml b/.github/workflows/smoke-tests.yml
@@ -0,0 +1,50 @@
+name: Smoke Tests
+
+on:
+  push:
+    branches: ["**"]
+  pull_request:
+    branches: ["**"]
+
+jobs:
+  smoke-tests:
+    name: Smoke Tests (Python ${{ matrix.python-version }})
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.11"]
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: pip
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install pytest pytest-cov pytest-timeout pyyaml
+          # Install lightweight subset of requirements (skip heavy GPU libs).
+          pip install numpy tqdm || true
-          pip install numpy tqdm || true
+          pip install numpy tqdm
-          pip install numpy tqdm || true
+          pip install numpy tqdm
+
+      - name: Run smoke tests
+        run: |
+          python -m pytest tests/test_smoke.py -v --tb=short --timeout=120
+
+      - name: Run legacy toolkit tests
+        run: |
-        run: |
+        run: |
+          set -o pipefail
-        run: |
+        run: |
+          set -o pipefail
+          python -m unittest test_godmode_toolkit -v 2>&1 | tail -5
+
+      - name: Run inference demo (sanity check)
+        run: |
+          python demos/demo_inference.py
+
+      - name: Run benchmark harness (sanity check)
+        run: |
+          python benchmarks/harness.py --prompts 5 --max-tokens 16
diff --git a/.gitignore b/.gitignore
@@ -38,3 +38,23 @@ victor_plugins/
 
 # Desktop shortcuts
 *.desktop
+
+# Victor CLI training runs and artifacts
+runs/
+victor_tokenizers/
+
+# Benchmark results (keep .gitkeep, ignore generated JSON)
+benchmarks/results/*.json
+
+# Python packaging / build
+dist/
+build/
+*.egg-info/
+.eggs/
+
+# Dev tooling
+.ruff_cache/
+.mypy_cache/
+.pytest_cache/
+.coverage
+htmlcov/
diff --git a/Makefile b/Makefile
@@ -0,0 +1,46 @@
+.PHONY: install install-dev test smoke lint format benchmark demo clean help
+
+PYTHON ?= python3
+PIP    ?= pip
+
+help:
+	@echo "Victor LLM – Makefile targets"
+	@echo ""
+	@echo "  install      Install runtime dependencies"
+	@echo "  install-dev  Install dev/test dependencies"
+	@echo "  test         Run all tests (smoke + toolkit)"
+	@echo "  smoke        Run only smoke tests (fast)"
+	@echo "  lint         Lint with ruff"
+	@echo "  format       Auto-format with ruff"
+	@echo "  benchmark    Run inference benchmark (5 prompts)"
+	@echo "  demo         Run the end-to-end demo"
+	@echo "  clean        Remove generated artifacts"
+
+install:
+	$(PIP) install -r requirements.txt
+
+install-dev: install
+	$(PIP) install pytest pytest-cov pytest-timeout ruff pyyaml
+
+test: smoke
+	$(PYTHON) -m unittest test_godmode_toolkit -v 2>&1 | tail -5
-	$(PYTHON) -m unittest test_godmode_toolkit -v 2>&1 | tail -5
+	$(PYTHON) -m unittest test_godmode_toolkit -v
-	$(PYTHON) -m unittest test_godmode_toolkit -v 2>&1 | tail -5
+	$(PYTHON) -m unittest test_godmode_toolkit -v
+
+smoke:
+	$(PYTHON) -m pytest tests/test_smoke.py -v --tb=short
+
+lint:
+	$(PYTHON) -m ruff check victor_cli/ tests/ demos/ benchmarks/
+
+format:
+	$(PYTHON) -m ruff check --fix victor_cli/ tests/ demos/ benchmarks/
+
+benchmark:
+	$(PYTHON) benchmarks/harness.py --prompts 5 --max-tokens 16
+
+demo:
+	$(PYTHON) demos/demo_e2e.py
+
+clean:
+	find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
+	find . -name "*.pyc" -delete 2>/dev/null || true
+	rm -rf .pytest_cache/ dist/ build/ *.egg-info/ runs/ victor_tokenizers/
diff --git a/README.md b/README.md
@@ -121,4 +121,129 @@ For a simpler, direct demonstration of an LLM-based agent, the `VICTOR_AGI_LLM.p
 -   New functionalities and tools can be added by creating plugins in the `victor_plugins` directory (the specific path is configured in `victor_core/config.py` via `ASIConfigCore.PLUGIN_DIR`).
 -   Larger, more specialized modules or standalone conceptual systems can be developed within the `victor_modules` directory.
 -   The system uses an asynchronous architecture; familiarity with Python's `asyncio` library is beneficial for development.
+
+---
+
+## Production-Grade Usage
+
+### Install
+
+```bash
+# Core runtime
+pip install -r requirements.txt
+pip install pyyaml          # required for dataset.yaml support
+
+# Optional – PyTorch-based transformer training
+pip install torch tqdm
+
+# Install victor CLI (editable mode)
+pip install -e .
+```
+
+### Dataset Layout
+
+Place datasets under `datasets/<dataset_name>/`:
+
+```
+datasets/
+  my_dataset/
+    train.jsonl     ← required
+    valid.jsonl     ← optional
+    test.jsonl      ← optional
+    dataset.yaml    ← optional metadata
+```
+
+Each `.jsonl` line is a JSON object.  Minimum fields by task:
+
+| Task               | Fields                        |
+|--------------------|-------------------------------|
+| Language model     | `text`                        |
+| Classification     | `text`, `label`               |
+| Instruction tuning | `instruction`, `response`     |
+
+See [`datasets/README.md`](datasets/README.md) for full documentation.
+
+### Training
+
+```bash
+# Validate a dataset
+victor prepare --dataset datasets/example_dataset
+
+# Train for 5 epochs
+victor train --dataset datasets/example_dataset --epochs 5
+
+# Fine-tune from a checkpoint
+victor train --dataset datasets/my_dataset --checkpoint runs/run-20260101/
 ```
+
+Or with a config file:
+
+```bash
+victor train --dataset datasets/my_dataset --config my_config.yaml
+```
+
+### Evaluation
+
+```bash
+victor eval --dataset datasets/example_dataset --checkpoint runs/run-20260101 --split test
+```
+
+### Inference
+
+```bash
+# Single prompt
+victor predict --prompt "Tell me about neural networks"
+
+# Multiple prompts from a file (one per line)
+victor predict --prompts-file prompts.txt --max-tokens 128
+```
+
+### Benchmarks
+
+```bash
+# Quick inference benchmark (10 prompts, 64 tokens each)
+victor benchmark --prompts 10 --max-tokens 64
+
+# Full benchmark harness with comparison
+python benchmarks/harness.py --prompts 50
+python benchmarks/harness.py --mode compare --compare benchmarks/results/
+```
+
+Results are saved as timestamped JSON under `benchmarks/results/`.
+
+### Demos
+
+```bash
+python demos/demo_inference.py   # minimal inference
+python demos/demo_finetune.py    # fine-tuning on example_dataset
+python demos/demo_e2e.py         # prepare → train → eval → predict → benchmark
+```
+
+See [`demos/README.md`](demos/README.md) for full documentation.
+
+### Tests
+
+```bash
+# Fast smoke tests (45 tests, < 2s)
+make smoke
+# or
+python -m pytest tests/test_smoke.py -v
+
+# Full suite (smoke + 149 toolkit tests)
+make test
+```
+
+### Makefile
+
+```
+make install      Install runtime dependencies
+make install-dev  Install dev/test dependencies
+make test         Run all tests
+make smoke        Run only smoke tests (fast)
+make lint         Lint with ruff
+make format       Auto-format with ruff
+make benchmark    Quick benchmark (5 prompts)
+make demo         End-to-end demo
+make clean        Remove generated artifacts
+```
+
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -0,0 +1,53 @@
+# Victor LLM Benchmarks
+
+This directory contains benchmarking infrastructure for Victor LLM.
+
+## Structure
+
+```
+benchmarks/
+  harness.py         ← standalone benchmarking harness (latency, throughput, memory)
+  results/           ← JSON results from past benchmark runs (auto-created)
+  README.md          ← this file
+```
+
+## Quick Start
+
+```bash
+# Run inference benchmark (no checkpoint needed)
+python benchmarks/harness.py
+
+# Run with a trained checkpoint
+python benchmarks/harness.py --checkpoint runs/<run_id>
+
+# Use the victor CLI
+victor benchmark --prompts 20 --max-tokens 128
+```
+
+## Metrics Captured
+
+| Metric | Description |
+|--------|-------------|
+| `latency_mean_s` | Mean per-prompt inference time (seconds) |
+| `latency_median_s` | Median per-prompt inference time |
+| `latency_min_s` / `latency_max_s` | Min / max latency |
+| `latency_stdev_s` | Standard deviation of latency |
+| `throughput_tokens_per_s` | Total tokens generated ÷ total time |
+| `memory_before_mb` | RSS before benchmark (MB) |
+| `memory_after_mb` | RSS after benchmark (MB) |
+| `memory_delta_mb` | Memory growth during benchmark |
+
+## Comparing Runs
+
+Results are stored as timestamped JSON files in `benchmarks/results/`.
+Use the compare helper:
+
+```bash
+python benchmarks/harness.py --compare benchmarks/results/
+```
+
+## Adding a Training Benchmark
+
+```bash
+python benchmarks/harness.py --mode training --dataset datasets/example_dataset --epochs 1
+```