From 2da2c3942cb1f7046587a12c6a2755ab9f2409ac Mon Sep 17 00:00:00 2001
From: Davis Sylvester <davis@sylvesterllc.com>
Date: Sat, 11 Apr 2026 02:18:51 -0500
Subject: [PATCH] feat: production readiness improvements (P0 + P1)

- Add ILlmFactory interface to abstract LLM providers (Ollama/OpenAI/Anthropic)
- Add LLM_PROVIDER env var for provider selection with per-provider model maps
- Add token/cost tracking per LLM call with per-model and per-task breakdowns
- Add per-task cost ceiling (TASK_COST_LIMIT, default $3) that aborts runaway tasks
- Add API key redaction in Winston logs (strips sk-*, sk-ant-*, known key fields)
- Add typed error classes (RateLimitError, ContextWindowExceededError, etc.)
- Add exponential backoff with jitter for retryable LLM errors
- Add --dry-run flag to show task plan without executing
- Add --verbose and --quiet flags for log level control
- Add colored CLI output via picocolors (green/yellow/red levels, cyan phases)
- Add Phase 4 run report generation (report.md in workspace)
- Add end-of-run cost summary (tokens, calls, dollar cost)
- Make package npm-publishable (bin field, version, keywords, files)
- Add GitHub Actions CI workflow (type-check on PR)
- Add CONTRIBUTING.md, SECURITY.md
- Add getting-started guide, bookmark API example PRD
- Add competitive comparison and future improvements docs
- Change OLLAMA_HOST default from private IP to localhost
- Accept raw text and stdin input in addition to file paths
---
 .github/workflows/ci.yml               |  28 ++
 CONTRIBUTING.md                        |  66 ++++
 README.md                              | 444 ++++++++++++++++++++++++-
 SECURITY.md                            |  37 +++
 bun.lock                               |   3 +
 docs/comparison.md                     | 163 +++++++++
 docs/future-improvements.md            | 311 +++++++++++++++++
 docs/getting-started.md                | 349 +++++++++++++++++++
 examples/bookmark-api-prd.md           |  76 +++++
 package.json                           |  17 +-
 src/agents/base-agent.mts              |  18 +-
 src/agents/codegen-agent.mts           |  10 +-
 src/agents/documentation-agent.mts     |  10 +-
 src/agents/planning-agent.mts          |  10 +-
 src/agents/qa-agent.mts                |   4 +-
 src/config/env.mts                     |  22 +-
 src/config/models.mts                  |  36 +-
 src/container/di.mts                   | 158 ++++++---
 src/index.mts                          | 148 ++++++++-
 src/interfaces/i-llm-factory.mts       |   6 +
 src/io/report-generator.mts            | 198 +++++++++++
 src/io/workspace.mts                   |   4 +
 src/llm/anthropic-factory.mts          |   7 +-
 src/llm/cost-tracker.mts               | 107 ++++++
 src/llm/ollama-factory.mts             |  10 +-
 src/llm/openai-factory.mts             |   7 +-
 src/llm/redact-secrets.mts             |  54 +++
 src/llm/retry-with-backoff.mts         |  57 ++++
 src/llm/stream-invoke.mts              |  28 +-
 src/orchestrator/fallback-fix-loop.mts |   7 +-
 src/orchestrator/fix-loop.mts          |  35 +-
 src/orchestrator/pipeline.mts          |  59 +++-
 src/types/agent-context.mts            |   2 +
 src/types/llm-errors.mts               |  89 +++++
 src/types/pipeline.mts                 |   5 +-
 35 files changed, 2455 insertions(+), 130 deletions(-)
 create mode 100644 .github/workflows/ci.yml
 create mode 100644 CONTRIBUTING.md
 create mode 100644 SECURITY.md
 create mode 100644 docs/comparison.md
 create mode 100644 docs/future-improvements.md
 create mode 100644 docs/getting-started.md
 create mode 100644 examples/bookmark-api-prd.md
 create mode 100644 src/interfaces/i-llm-factory.mts
 create mode 100644 src/io/report-generator.mts
 create mode 100644 src/llm/cost-tracker.mts
 create mode 100644 src/llm/redact-secrets.mts
 create mode 100644 src/llm/retry-with-backoff.mts
 create mode 100644 src/types/llm-errors.mts

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..6227cfd
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,28 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  typecheck:
+    name: Type Check
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: oven-sh/setup-bun@v2
+        with:
+          bun-version: latest
+
+      - name: Install dependencies
+        run: bun install --frozen-lockfile
+
+      - name: Type check
+        run: bunx tsc --noEmit
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..f7df0ec
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,66 @@
+# Contributing
+
+Thank you for your interest in contributing to API Generator Agent.
+
+## Getting Started
+
+```bash
+git clone <repo-url>
+cd api-generator-agent
+bun install
+```
+
+## Development
+
+### Prerequisites
+
+- [Bun](https://bun.sh) v1.3+
+- [Docker](https://www.docker.com/) (for integration tests)
+- An LLM provider (Ollama, OpenAI, or Anthropic)
+
+### Type Checking
+
+```bash
+bunx tsc --noEmit
+```
+
+### Running
+
+```bash
+bun run src/index.mts sample-prd.md
+```
+
+## Code Standards
+
+- **TypeScript strict mode** with no `any` types
+- **Conventional commits**: `feat:`, `fix:`, `chore:`, `refactor:`, `test:`, `docs:`
+- **One interface per file** with `i-` prefix in `interfaces/` directories
+- **Winston logger** for all output (no `console.log`)
+- **Zod** for all external input validation
+- **Result types** for error handling (`Result<T, E>`)
+- Double quotes, trailing commas, arrow functions for callbacks
+
+## Pull Request Process
+
+1. Create a feature branch from `main`: `git checkout -b feat/your-feature`
+2. Make your changes and ensure `bunx tsc --noEmit` passes
+3. Write clear commit messages following conventional commits
+4. Open a PR against `main` with a description of what changed and why
+5. Ensure CI passes before requesting review
+
+## Architecture
+
+See the [README](README.md#project-structure) for a full project structure overview. Key patterns:
+
+- **Agents** (`src/agents/`) extend `BaseAgent` and implement `execute()`
+- **LLM factories** (`src/llm/`) implement `ILlmFactory` for provider abstraction
+- **Orchestrator** (`src/orchestrator/`) runs the pipeline phases
+- **DI container** (`src/container/di.mts`) wires everything together
+
+## Reporting Issues
+
+Open a GitHub issue with:
+- Steps to reproduce
+- Expected vs actual behavior
+- Your LLM provider and model
+- Relevant log output (redact API keys)
diff --git a/README.md b/README.md
index b1f6147..7c966f7 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,449 @@
-# api-generator-agent
+# API Generator Agent
 
-To install dependencies:
+A multi-agent code generation pipeline that takes a Product Requirements Document (PRD) and produces a complete, production-ready Elysia web API with BunJS. The system decomposes requirements into a dependency-ordered task graph, generates TypeScript code for each task, lints it, runs real tests against MongoDB, and iterates until all tests pass.
+
+## How It Works
+
+```
+PRD  -->  Planning Agent  -->  Task Graph  -->  [Per-Task Pipeline]  -->  Assembled API
+                                                      |
+                                          Codegen --> Lint --> Test
+                                              ^                |
+                                              |   fix loop     |
+                                              +--- errors -----+
+```
+
+**Phase 1 - Planning:** An LLM decomposes the PRD into a directed acyclic graph (DAG) of tasks with dependencies. Task types include `setup`, `model`, `repository`, `service`, `middleware`, and `endpoint`.
+
+**Phase 2 - Code Generation:** Tasks execute in topological order with configurable concurrency (default: 4 parallel). Each task enters a fix loop:
+1. **Codegen** - LLM generates TypeScript source files and tests
+2. **Lint** - ESLint auto-fixes the generated code
+3. **QA** - Runs unit tests (via `bun test`) and integration tests against a real MongoDB container
+4. If tests fail, errors feed back into the codegen agent for a fix attempt
+5. Loop repeats up to `MAX_FIX_ITERATIONS` (default: 5)
+
+**Phase 2.25 - Assembly:** Completed endpoint plugins are wired into a single `index.mts` entry file with `.use()` calls.
+
+**Phase 2.5 - Integration Testing:** Hoppscotch CLI runs integration tests against each completed task's endpoints.
+
+**Phase 3 - Documentation:** All generated code is analyzed and a Hoppscotch API collection is produced.
+
+**Phase 4 - Report:** A consolidated `report.md` is written to the workspace with the full run summary — task plan, pass/fail results, iteration counts, cost breakdown per model and per task, integration test results, generated file list, and links to all output artifacts.
+
+**Fallback Escalation:** If the primary LLM fails on a task, the system escalates through configured fallback tiers (e.g., Ollama -> OpenAI -> Anthropic) with independent iteration budgets per tier.
+
+## Prerequisites
+
+- [Bun](https://bun.sh) v1.3+
+- [Docker](https://www.docker.com/) (for MongoDB test containers)
+- At least one LLM provider configured (see [Configuration](#configuration))
+
+## Installation
 
 ```bash
+git clone <repo-url>
+cd api-generator-agent
 bun install
 ```
 
-To run:
+> **First time?** Follow the [Getting Started Guide](docs/getting-started.md) for a step-by-step walkthrough with a sample PRD.
+
+## Configuration
+
+Set environment variables directly or in a `.env` file (Bun loads `.env` automatically).
+
+### LLM Provider
+
+| Variable | Description | Default |
+|---|---|---|
+| `LLM_PROVIDER` | Primary LLM backend: `ollama`, `openai`, or `anthropic` | `ollama` |
+| `OLLAMA_HOST` | Ollama server URL | `http://localhost:11434` |
+| `OLLAMA_API_KEY` | Ollama cloud API key (uses `https://api.ollama.com` for codegen when set) | - |
+| `OPENAI_API_KEY` | OpenAI API key (required when `LLM_PROVIDER=openai`) | - |
+| `ANTHROPIC_API_KEY` | Anthropic API key (required when `LLM_PROVIDER=anthropic`) | - |
+
+### Pipeline
+
+| Variable | Description | Default |
+|---|---|---|
+| `MAX_FIX_ITERATIONS` | Max codegen/test fix loop iterations per task (1-20) | `5` |
+| `MAX_CONCURRENCY` | Parallel task execution slots (1-8) | `4` |
+| `WORKSPACE_DIR` | Output directory for run artifacts | `.workspace` |
+| `LLM_TIMEOUT_MS` | LLM response timeout in ms (10s-60min) | `1800000` (30min) |
+| `INTEGRATION_PORT` | Base port for integration test servers (each task offsets by +1) | `4100` |
+
+### Observability (optional)
+
+| Variable | Description | Default |
+|---|---|---|
+| `LANGSMITH_TRACING` | Enable LangSmith tracing | `true` |
+| `LANGSMITH_API_KEY` | LangSmith API key | - |
+| `LANGSMITH_ENDPOINT` | LangSmith endpoint URL | `https://api.smith.langchain.com` |
+| `LANGSMITH_PROJECT` | LangSmith project name | `api-generator-agent` |
+
+### Example `.env`
+
+```bash
+# Use OpenAI as primary, Anthropic as fallback
+LLM_PROVIDER=openai
+OPENAI_API_KEY=sk-...
+ANTHROPIC_API_KEY=sk-ant-...
+
+# Or use a self-hosted Ollama instance
+# LLM_PROVIDER=ollama
+# OLLAMA_HOST=http://my-gpu-server:11434
+
+MAX_FIX_ITERATIONS=5
+MAX_CONCURRENCY=4
+```
+
+## Usage
+
+```
+bun run src/index.mts <prd-file-or-text> [max-iterations] [max-tasks]
+```
+
+| Argument | Required | Description |
+|---|---|---|
+| `prd-file-or-text` | Yes | Path to a PRD file, raw PRD text, or `-` for stdin |
+| `max-iterations` | No | Override `MAX_FIX_ITERATIONS` for this run |
+| `max-tasks` | No | Only process the first N tasks from the plan |
+
+The first argument is auto-detected: if it matches an existing file, it's read as a file. Otherwise it's treated as raw PRD text. Pass `-` to read from stdin.
+
+### Examples
 
 ```bash
-bun run index.ts
+# From a file
+bun run src/index.mts sample-prd.md
+
+# Raw text
+bun run src/index.mts "Build a notes API with user auth, CRUD on notes, and pagination"
+
+# Piped from stdin
+cat my-prd.md | bun run src/index.mts -
+
+# Limit to 3 fix iterations and only the first 5 tasks
+bun run src/index.mts beautician-scheduling-prd.md 3 5
+
+# Use OpenAI for a single run
+LLM_PROVIDER=openai OPENAI_API_KEY=sk-... bun run src/index.mts sample-prd.md
+
+# Point to a remote Ollama instance
+OLLAMA_HOST=http://192.168.1.100:11434 bun run src/index.mts sample-prd.md
 ```
 
-This project was created using `bun init` in bun v1.3.9. [Bun](https://bun.com) is a fast all-in-one JavaScript runtime.
+## Writing a PRD
+
+The planning agent expects a markdown document describing your API. Include:
+
+1. **Overview** - What the API does
+2. **Data Models** - Entities with their fields and types
+3. **Endpoints** - HTTP method, path, description, auth requirements
+4. **Business Rules** - Validation, access control, domain logic
+
+### Minimal Example
+
+```markdown
+# Todo API - Product Requirements Document
+
+## Overview
+Build a simple Todo API with user authentication. Users can create,
+read, update, and delete todos. Each todo belongs to a user.
+
+## Data Models
+
+### User
+- id: UUID (auto-generated)
+- email: string (unique, required)
+- name: string (required)
+- createdAt: datetime
+
+### Todo
+- id: UUID (auto-generated)
+- title: string (required, max 200 chars)
+- description: string (optional, max 2000 chars)
+- completed: boolean (default false)
+- priority: enum (low, medium, high)
+- userId: UUID (foreign key to User)
+- createdAt: datetime
+- updatedAt: datetime
+
+## Endpoints
+
+### Auth
+- POST /api/v1/auth/register - Register a new user (email, name, password)
+- POST /api/v1/auth/login - Login and receive a JWT token
+
+### Users
+- GET /api/v1/users/me - Get current user profile (requires auth)
+
+### Todos
+- GET /api/v1/todos - List all todos for the authenticated user (pagination)
+- POST /api/v1/todos - Create a new todo
+- GET /api/v1/todos/:id - Get a specific todo
+- PUT /api/v1/todos/:id - Update a todo
+- DELETE /api/v1/todos/:id - Delete a todo
+- PATCH /api/v1/todos/:id/complete - Toggle todo completion status
+
+## Business Rules
+- Users can only see and modify their own todos
+- Pagination defaults: page=1, limit=20, max limit=100
+- Todo title is required and cannot be empty
+- Priority defaults to "medium" if not specified
+
+## Non-Functional
+- All responses in JSON
+- Standard error format: { error: string, statusCode: number }
+- Health check at GET /healthz
+```
+
+### Advanced Example
+
+The repo includes `beautician-scheduling-prd.md`, a multi-tenant appointment scheduling API with 6 entities, 20+ endpoints, discount codes, availability windows, and grace period logic. This demonstrates the agent handling complex domain rules and cross-entity dependencies.
+
+## Sample Output
+
+Running the Todo API PRD above produces output like the following:
+
+```
+$ bun run src/index.mts sample-prd.md
+
+2026-04-11T14:32:01.000Z [info] Reading PRD from: /home/user/api-generator-agent/sample-prd.md
+2026-04-11T14:32:01.001Z [info] PRD loaded (1247 chars)
+2026-04-11T14:32:01.001Z [info] Config: maxIterations=5, concurrency=4
+2026-04-11T14:32:01.001Z [info] LLM provider: openai
+
+2026-04-11T14:32:01.002Z [info] Phase 1: Planning - generating task graph from PRD
+2026-04-11T14:32:08.451Z [info] Planning complete: 8 tasks generated in 7449ms (model: gpt-5.4)
+2026-04-11T14:32:08.451Z [info]   [plan] Task: setup-foundation - "Project setup" (depends: [])
+2026-04-11T14:32:08.451Z [info]   [plan] Task: model-user - "User schema" (depends: [setup-foundation])
+2026-04-11T14:32:08.451Z [info]   [plan] Task: model-todo - "Todo schema" (depends: [setup-foundation])
+2026-04-11T14:32:08.452Z [info]   [plan] Task: middleware-auth - "JWT auth middleware" (depends: [model-user])
+2026-04-11T14:32:08.452Z [info]   [plan] Task: repo-user - "User repository" (depends: [model-user])
+2026-04-11T14:32:08.452Z [info]   [plan] Task: repo-todo - "Todo repository" (depends: [model-todo])
+2026-04-11T14:32:08.452Z [info]   [plan] Task: endpoint-auth - "Auth endpoints" (depends: [repo-user, middleware-auth])
+2026-04-11T14:32:08.452Z [info]   [plan] Task: endpoint-todos - "Todo endpoints" (depends: [repo-todo, middleware-auth])
+
+2026-04-11T14:32:08.453Z [info] Phase 2: Executing task graph
+2026-04-11T14:32:08.453Z [info]   [executor] Ready: setup-foundation
+2026-04-11T14:32:22.100Z [info]   [codegen] Success with model gpt-5.4 (4200ms)
+2026-04-11T14:32:23.500Z [info]   [qa] setup-foundation: unit PASS, integration PASS
+2026-04-11T14:32:23.501Z [info]   [executor] Ready: model-user, model-todo
+...
+2026-04-11T14:35:47.200Z [info] Task execution complete: 8 completed, 0 failed, 0 skipped (of 8)
+2026-04-11T14:35:47.200Z [info]   [result] [OK] setup-foundation - 1 iterations
+2026-04-11T14:35:47.200Z [info]   [result] [OK] model-user - 1 iterations
+2026-04-11T14:35:47.200Z [info]   [result] [OK] model-todo - 1 iterations
+2026-04-11T14:35:47.200Z [info]   [result] [OK] middleware-auth - 2 iterations
+2026-04-11T14:35:47.201Z [info]   [result] [OK] repo-user - 1 iterations
+2026-04-11T14:35:47.201Z [info]   [result] [OK] repo-todo - 1 iterations
+2026-04-11T14:35:47.201Z [info]   [result] [OK] endpoint-auth - 3 iterations
+2026-04-11T14:35:47.201Z [info]   [result] [OK] endpoint-todos - 2 iterations
+
+2026-04-11T14:35:47.202Z [info] Phase 2.25: Assembly - wiring endpoint plugins into index.mts
+2026-04-11T14:35:47.250Z [info]   [assembly] Found plugin: authRoutes in src/routes/auth.mts
+2026-04-11T14:35:47.250Z [info]   [assembly] Found plugin: todoRoutes in src/routes/todos.mts
+2026-04-11T14:35:47.251Z [info]   [assembly] Assembled index.mts with 2 plugin(s)
+
+2026-04-11T14:35:47.252Z [info] Phase 3: Generating documentation
+2026-04-11T14:35:52.100Z [info] Documentation generated successfully
+
+2026-04-11T14:35:52.500Z [info] === Pipeline Results ===
+2026-04-11T14:35:52.500Z [info] Run ID: a1b2c3d4-e5f6-7890-abcd-ef1234567890
+2026-04-11T14:35:52.500Z [info] Duration: 231498ms
+2026-04-11T14:35:52.500Z [info] Documentation: generated
+2026-04-11T14:35:52.500Z [info] Tasks: 8 completed, 0 failed, 0 skipped
+2026-04-11T14:35:52.501Z [info] Workspace: .workspace/a1b2c3d4-e5f6-7890-abcd-ef1234567890/
+```
+
+## Output Structure
+
+Each run creates a workspace directory with all artifacts:
+
+```
+.workspace/{run-id}/
+  config.json                    # Pipeline configuration for this run
+  plan.json                      # Task graph (DAG) generated from the PRD
+  execution-summary.json         # Final status of all tasks
+  integration-results.json       # Per-task integration test results
+  pipeline-result.json           # Run duration and metadata
+  report.md                      # Full human-readable run report
+  logs/
+    run.log                      # Full pipeline log (JSON lines)
+  docs/
+    assembled-index.mts          # Final Elysia app with all plugins wired
+    hoppscotch-collection.json   # Generated API collection for Hoppscotch
+  tasks/
+    setup-foundation/
+      code/
+        src/index.mts            # Elysia app, MongoDB connection, health endpoint
+        src/db.mts               # Database connection helper
+      code-linted/               # ESLint-corrected versions
+      tests/
+        setup-foundation.test.mts
+      integration/               # Hoppscotch collection + env for this task
+      iterations/
+        0/code/                  # Code snapshot from iteration 0
+        1/code/                  # Code snapshot from iteration 1 (if fix needed)
+      qa-results.json            # Unit + integration test results
+      qa-knowledge.md            # Accumulated learnings from test failures
+      status.json
+    model-user/
+      code/
+        src/types/user.mts       # TypeBox schema
+        src/types/index.mts      # Barrel export
+      ...
+    endpoint-auth/
+      code/
+        src/routes/auth.mts      # Elysia plugin with register + login routes
+      ...
+```
+
+### Key Output Files
+
+| File | Description |
+|---|---|
+| `report.md` | Full run report: task plan, results, cost breakdown (per model + per task), integration tests, generated files, output artifacts. |
+| `docs/assembled-index.mts` | The final Elysia entry file with all endpoint plugins imported and mounted via `.use()`. This is the runnable API. |
+| `docs/hoppscotch-collection.json` | Import into [Hoppscotch](https://hoppscotch.io) to test all endpoints interactively. |
+| `plan.json` | The task DAG - useful for understanding how the PRD was decomposed. |
+| `execution-summary.json` | Check which tasks passed or failed and their iteration counts. |
+| `tasks/{id}/qa-knowledge.md` | Error patterns and fixes discovered during the run - reused across iterations. |
+
+## Generated API Stack
+
+The agent produces APIs using the following stack:
+
+| Layer | Technology |
+|---|---|
+| Runtime | [Bun](https://bun.sh) |
+| Framework | [Elysia](https://elysiajs.com) |
+| Database | MongoDB (native driver) |
+| Validation | [TypeBox](https://github.com/sinclairzx81/typebox) |
+| Auth | JWT via [jose](https://github.com/panva/jose), passwords via `Bun.password` |
+| Testing | `bun test` (unit), [Hoppscotch CLI](https://docs.hoppscotch.io/documentation/clients/cli/overview) (integration) |
+
+All generated endpoints follow a standard response shape:
+
+```json
+{
+  "statusCode": 200,
+  "message": "Todos retrieved successfully",
+  "date": "2026-04-11T14:32:00.000Z",
+  "source": "/api/v1/todos",
+  "data": []
+}
+```
+
+## Task Types
+
+The planning agent decomposes PRDs into these task types, executed in dependency order:
+
+| Type | Responsibility | Example Output |
+|---|---|---|
+| `setup` | Elysia app scaffold, MongoDB connection, health endpoint, error handler | `src/index.mts`, `src/db.mts` |
+| `model` | TypeBox schemas with barrel exports | `src/types/user.mts`, `src/types/index.mts` |
+| `repository` | Data access layer (MongoDB queries, Result types) | `src/repositories/user-repository.mts` |
+| `service` | Business logic orchestration | `src/services/todo-service.mts` |
+| `middleware` | Auth guards, validators (Elysia `.guard()` + `.resolve()`) | `src/middleware/auth.mts` |
+| `endpoint` | Elysia route plugins (mounted via `.use()` in assembly) | `src/routes/todos.mts` |
+
+## Knowledge Bases
+
+The agent accumulates learnings from test failures in `docs/knowledge-bases/`. Each task has its own knowledge file that persists across runs. When a task encounters a test error and fixes it, the pattern is recorded so future runs avoid the same mistake.
+
+Example entry from `endpoint-auth-knowledge.md`:
+
+```markdown
+## Error: auth middleware must use .as('plugin')
+- **Condition**: Guard/resolve not applied to routes in consuming endpoint
+- **Resolution**: Add .as('plugin') after .guard().resolve() chain
+- **Status**: Resolved
+```
+
+## Fallback Escalation
+
+When configured with multiple API keys, the agent escalates through LLM tiers if the primary model fails on a task:
+
+```
+Primary (LLM_PROVIDER) --> Fallback Tier 1 --> Fallback Tier 2
+     5 iterations            16 iterations      16 iterations
+```
+
+For example, with `LLM_PROVIDER=ollama`, `OPENAI_API_KEY`, and `ANTHROPIC_API_KEY` all set:
+
+1. **Primary**: Ollama (local, 5 iterations)
+2. **Fallback 1**: OpenAI GPT-5.4 (16 iterations)
+3. **Fallback 2**: Anthropic Claude Sonnet 4.6 (16 iterations)
+
+The primary provider is automatically excluded from fallback tiers to avoid redundancy.
+
+## Example PRDs
+
+| PRD | Complexity | Entities | Endpoints | Description |
+|---|---|---|---|---|
+| [`examples/bookmark-api-prd.md`](examples/bookmark-api-prd.md) | Medium | 3 (User, Folder, Bookmark) | 14 | Bookmark manager with folders, tags, and search |
+| [`sample-prd.md`](sample-prd.md) | Simple | 2 (User, Todo) | 9 | Classic todo app with priorities and pagination |
+| [`beautician-scheduling-prd.md`](beautician-scheduling-prd.md) | Complex | 6 | 20+ | Multi-tenant appointment scheduling with grace periods and discount codes |
+
+Start with the bookmark API if this is your first time:
+
+```bash
+bun run src/index.mts examples/bookmark-api-prd.md
+```
+
+## Plan Caching
+
+The planning phase is cached by PRD content hash. If you run the same PRD twice, the second run reuses the task graph from the first without calling the LLM. Cache files are stored in `.workspace/.plan-cache/`.
+
+To force re-planning, delete the cache directory:
+
+```bash
+rm -rf .workspace/.plan-cache/
+```
+
+## Project Structure
+
+```
+src/
+  index.mts                     # CLI entry point
+  config/
+    env.mts                     # Zod-validated environment schema
+    models.mts                  # Per-provider model name mappings
+    fallback-tiers.mts          # Fallback tier interface
+  interfaces/
+    i-llm-factory.mts           # Common LLM factory interface
+  llm/
+    ollama-factory.mts          # Ollama ChatModel factory
+    openai-factory.mts          # OpenAI ChatModel factory
+    anthropic-factory.mts       # Anthropic ChatModel factory
+    stream-invoke.mts           # Streaming LLM invocation
+    with-timeout.mts            # LLM timeout wrapper
+  agents/
+    base-agent.mts              # Abstract agent with model chain + fallback
+    planning-agent.mts          # PRD -> task graph
+    codegen-agent.mts           # Task -> TypeScript code + tests
+    qa-agent.mts                # Test runner (bun test + Hoppscotch CLI)
+    eslint-agent.mts            # Lint auto-fixer
+    documentation-agent.mts     # Code -> Hoppscotch collection
+  orchestrator/
+    pipeline.mts                # Main pipeline (phases 1-3)
+    fix-loop.mts                # Single-task codegen/lint/test loop
+    fallback-fix-loop.mts       # Fix loop with LLM tier escalation
+  graph/
+    parallel-executor.mts       # Topological DAG executor with concurrency
+    task-graph.mts              # DAG validation
+  container/
+    di.mts                      # Dependency injection / factory wiring
+  io/
+    workspace.mts               # Workspace directory management
+    file-protocol.mts           # JSON/code file read/write helpers
+  prompts/                      # System + user prompts for each agent
+  types/                        # TypeScript interfaces (task, pipeline, result)
+  validators/                   # Import/export validation for generated code
+```
+
+## License
+
+ISC
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000..1d3e22d
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,37 @@
+# Security Policy
+
+## Reporting a Vulnerability
+
+If you discover a security vulnerability, please report it responsibly:
+
+1. **Do not** open a public GitHub issue
+2. Email the maintainers with a description of the vulnerability
+3. Include steps to reproduce if possible
+4. Allow reasonable time for a fix before public disclosure
+
+## Security Model
+
+### API Key Handling
+
+- API keys are loaded from environment variables or `.env` files
+- Keys are **never logged** — Winston uses a redaction format that strips known key patterns (`sk-*`, `sk-ant-*`, `key-*`) and known field names (`apiKey`, `OPENAI_API_KEY`, etc.)
+- Keys are never written to workspace output files
+- `.env` files are excluded from git via `.gitignore`
+
+### Generated Code Execution
+
+- The QA agent runs generated code and tests in a subprocess
+- MongoDB test containers run on an isolated port (default: 27018)
+- Generated code has access to the local filesystem within the workspace directory
+
+### Dependencies
+
+- Dependencies are pinned via `bun.lock`
+- No post-install scripts execute arbitrary code (except `isolated-vm` from `@hoppscotch/cli`, which requires native compilation)
+
+## Scope
+
+This agent generates code that runs locally. It does not:
+- Expose any network services beyond test servers on localhost
+- Send generated code to external services (except LLM providers for generation)
+- Store credentials in generated output
diff --git a/bun.lock b/bun.lock
index 8928f1a..c37af4d 100644
--- a/bun.lock
+++ b/bun.lock
@@ -15,6 +15,7 @@
         "langchain": "^1.3.0",
         "langsmith": "^0.5.16",
         "ollama": "^0.6.3",
+        "picocolors": "^1.1.1",
         "winston": "^3.19.0",
         "zod": "^4.3.6",
         "zod-to-json-schema": "^3.25.2",
@@ -365,6 +366,8 @@
 
     "path-key": ["path-key@3.1.1", "", {}, "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q=="],
 
+    "picocolors": ["picocolors@1.1.1", "", {}, "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA=="],
+
     "playwright": ["playwright@1.59.1", "", { "dependencies": { "playwright-core": "1.59.1" }, "optionalDependencies": { "fsevents": "2.3.2" }, "bin": { "playwright": "cli.js" } }, "sha512-C8oWjPR3F81yljW9o5OxcWzfh6avkVwDD2VYdwIGqTkl+OGFISgypqzfu7dOe4QNLL2aqcWBmI3PMtLIK233lw=="],
 
     "playwright-core": ["playwright-core@1.59.1", "", { "bin": { "playwright-core": "cli.js" } }, "sha512-HBV/RJg81z5BiiZ9yPzIiClYV/QMsDCKUyogwH9p3MCP6IYjUFu/MActgYAvK0oWyV9NlwM3GLBjADyWgydVyg=="],
diff --git a/docs/comparison.md b/docs/comparison.md
new file mode 100644
index 0000000..b8a489c
--- /dev/null
+++ b/docs/comparison.md
@@ -0,0 +1,163 @@
+# Competitive Comparison: API Generator Agent vs Top AI Coding Agents
+
+A feature-by-feature comparison of API Generator Agent against the leading open-source AI code generation and coding agent tools as of April 2026.
+
+## Agents Compared
+
+| Agent | Primary Use Case | Distribution | Stars |
+|---|---|---|---|
+| **API Generator Agent** | PRD-to-API pipeline (Elysia/Bun) | npm/bun | - |
+| **Aider** | CLI pair programmer | pip (PyPI) | 30k+ |
+| **GPT Engineer** | Natural language to codebase | pip (PyPI) | 50k+ |
+| **OpenHands** | Autonomous software engineer | Docker / pip | 50k+ |
+| **SWE-agent** | Issue-to-PR automation | pip (PyPI) | 15k+ |
+| **Bolt.new** | Browser-based full-stack builder | pnpm (web app) | 15k+ |
+| **Cline** | VS Code autonomous agent | VS Code Marketplace | 30k+ |
+
+## Feature Matrix
+
+### Core Pipeline
+
+| Feature | API Gen Agent | Aider | GPT Engineer | OpenHands | SWE-agent | Bolt.new | Cline |
+|---|---|---|---|---|---|---|---|
+| PRD-to-code pipeline | **Yes** | No | Partial | No | No | Partial | No |
+| Task graph decomposition | **Yes (DAG)** | No | Sequential | No | No | Sequential | No |
+| Parallel task execution | **Yes (4 slots)** | No | No | No | No | No | No |
+| Plan caching | **Yes (hash)** | No | No | No | No | No | No |
+| Assembly phase | **Yes** | No | No | No | No | Yes | No |
+| Dry-run mode | **Yes** | No | No | No | No | No | No |
+
+API Generator Agent is the only tool that decomposes a PRD into a dependency-ordered DAG and executes tasks in parallel with configurable concurrency. Other tools either work file-by-file (Aider, Cline) or use sequential generation (GPT Engineer, Bolt.new).
+
+### LLM Provider Support
+
+| Feature | API Gen Agent | Aider | GPT Engineer | OpenHands | SWE-agent | Bolt.new | Cline |
+|---|---|---|---|---|---|---|---|
+| OpenAI | Yes | Yes | Yes | Yes | Yes | Yes | Yes |
+| Anthropic | Yes | Yes | Yes | Yes | Yes | Yes | Yes |
+| Ollama (local) | **Yes (primary)** | Yes | No | Yes | No | No | No |
+| Multi-tier fallback | **Yes (3 tiers)** | No | No | No | Yes | No | No |
+| Cloud Ollama | **Yes** | No | No | No | No | No | No |
+| Custom model chains | **Yes (per role)** | Yes | No | No | Yes | No | No |
+
+API Generator Agent and SWE-agent are the only tools with structured multi-tier fallback escalation. API Generator Agent uniquely assigns different models to different agent roles (planning vs codegen vs QA) and supports Ollama cloud as a separate codegen tier.
+
+### Testing and Validation
+
+| Feature | API Gen Agent | Aider | GPT Engineer | OpenHands | SWE-agent | Bolt.new | Cline |
+|---|---|---|---|---|---|---|---|
+| Auto-generates tests | **Yes** | No | Yes | No | No | No | No |
+| Runs tests in loop | **Yes** | Yes | No | No | No | No | Yes |
+| Real database testing | **Yes (MongoDB)** | No | No | No | No | No | No |
+| Integration tests | **Yes (Hoppscotch)** | No | No | No | No | No | Partial |
+| Auto-lint + fix | **Yes** | Yes | No | No | No | No | Yes |
+| Knowledge accumulation | **Yes** | No | No | No | No | No | No |
+| Fix loop with iteration cap | **Yes** | Partial | No | No | No | No | Yes |
+
+API Generator Agent is the only tool that generates code, writes tests, spins up a real MongoDB container, runs the tests, and feeds failures back to the LLM in a loop. Aider runs user-specified test commands but does not generate tests. The knowledge base system that accumulates error patterns across runs is unique to API Generator Agent.
+
+### Error Handling and Resilience
+
+| Feature | API Gen Agent | Aider | GPT Engineer | OpenHands | SWE-agent | Bolt.new | Cline |
+|---|---|---|---|---|---|---|---|
+| Typed error classes | **Yes** | No | No | Yes | Partial | No | No |
+| Exponential backoff | **Yes** | Partial | No | Yes | Yes | No | No |
+| Rate limit handling | **Yes** | Yes | No | Yes | Yes | No | No |
+| Cost ceiling per task | **Yes** | No | No | Yes | **Yes** | No | No |
+| Token/cost tracking | **Yes** | Yes | No | Yes | Yes | No | **Yes** |
+| Max iteration cap | **Yes** | No | No | No | Yes | No | **Yes (8)** |
+| API key redaction | **Yes** | Partial | No | Partial | **Yes** | No | **Yes** |
+
+API Generator Agent now matches or exceeds the resilience features of SWE-agent, with typed error classification (`RateLimitError`, `ContextWindowExceededError`, `AuthenticationError`, `ModelUnavailableError`, `CostLimitExceededError`), exponential backoff with jitter, per-task cost ceilings, and Winston-level key redaction.
+
+### CLI and UX
+
+| Feature | API Gen Agent | Aider | GPT Engineer | OpenHands | SWE-agent | Bolt.new | Cline |
+|---|---|---|---|---|---|---|---|
+| Colored terminal output | **Yes** | **Yes** | No | No | No | N/A (web) | N/A (IDE) |
+| Streaming LLM output | **Yes** | **Yes** | No | Yes | No | Yes | Yes |
+| Progress spinners | **Yes** | No | No | No | No | Yes | Yes |
+| `--dry-run` | **Yes** | No | No | No | No | No | No |
+| `--verbose` / `--quiet` | **Yes** | **Yes** | No | Yes | No | No | No |
+| File input | **Yes** | Yes | Yes | Yes | Yes | N/A | N/A |
+| Stdin piping | **Yes** | Yes | No | No | No | N/A | N/A |
+| Raw text input | **Yes** | Yes | No | No | No | Yes | Yes |
+| Interactive REPL | No | **Yes** | No | Yes | No | **Yes** | **Yes** |
+| Voice input | No | **Yes** | No | No | No | No | No |
+| Browser preview | No | No | No | No | No | **Yes** | No |
+
+Aider leads in CLI UX with REPL, vim bindings, voice input, and extensive color customization. API Generator Agent focuses on single-shot pipeline execution but offers flexible input modes (file, text, stdin) and dry-run for plan inspection.
+
+### Observability
+
+| Feature | API Gen Agent | Aider | GPT Engineer | OpenHands | SWE-agent | Bolt.new | Cline |
+|---|---|---|---|---|---|---|---|
+| Structured JSON logging | **Yes** | No | No | Yes | No | No | No |
+| LangSmith tracing | **Yes** | No | No | No | No | No | No |
+| OpenTelemetry export | No | No | No | **Yes** | No | No | No |
+| Run artifacts/workspace | **Yes** | Partial | No | Yes | **Yes** | No | Yes |
+| Cost summary at end | **Yes** | Yes | No | Yes | Yes | No | **Yes** |
+| Per-task cost breakdown | **Yes** | No | No | No | **Yes** | No | No |
+
+API Generator Agent provides the most comprehensive run artifacts: per-task code snapshots, QA results, knowledge files, and iteration logs — all in a structured workspace directory. OpenHands leads in observability infrastructure with full OpenTelemetry support.
+
+### Distribution and Packaging
+
+| Feature | API Gen Agent | Aider | GPT Engineer | OpenHands | SWE-agent | Bolt.new | Cline |
+|---|---|---|---|---|---|---|---|
+| npm / bunx install | **Yes** | No | No | No | No | Yes | Yes |
+| pip install | No | **Yes** | **Yes** | **Yes** | **Yes** | No | No |
+| Docker image | No | Yes | Yes | **Yes** | No | No | No |
+| VS Code extension | No | No | No | No | No | No | **Yes** |
+| Standalone binary | No | No | No | No | No | No | No |
+| GitHub Actions CI | **Yes** | **Yes** | **Yes** | **Yes** | **Yes** | Yes | **Yes** |
+
+### Security
+
+| Feature | API Gen Agent | Aider | GPT Engineer | OpenHands | SWE-agent | Bolt.new | Cline |
+|---|---|---|---|---|---|---|---|
+| Key redaction in logs | **Yes** | Partial | No | Partial | **Yes** | No | Yes |
+| Sandboxed execution | No | No | No | **Yes (Docker)** | No | **Yes (WASM)** | No |
+| Permission gates | No | No | No | No | No | No | **Yes** |
+| SECURITY.md | **Yes** | No | No | **Yes** | No | No | **Yes** |
+| SSL verification | N/A | **Yes** | No | Yes | No | N/A | N/A |
+
+OpenHands leads in security with Docker sandboxing. Cline leads in permission control with human-in-the-loop gates for every file write. API Generator Agent provides key redaction and security documentation.
+
+## Where API Generator Agent Leads
+
+1. **PRD-to-API pipeline** — No other tool takes a product requirements document and produces a fully assembled, tested API with database integration, auth, and documentation.
+
+2. **Real integration testing** — The only tool that spins up a MongoDB container, runs generated tests against it, and uses test failures to drive code fixes.
+
+3. **Knowledge accumulation** — Error patterns and fix strategies persist across runs in per-task knowledge bases, making subsequent runs more reliable.
+
+4. **Parallel DAG execution** — Tasks execute in topological order with configurable concurrency. Other tools process sequentially.
+
+5. **Multi-tier model escalation** — Three-tier fallback chain with independent iteration budgets per tier, with different models assignable to different agent roles.
+
+6. **Plan caching** — Re-running the same PRD skips the planning phase entirely.
+
+7. **Hoppscotch integration** — Auto-generates an importable API collection for interactive endpoint testing.
+
+## Where API Generator Agent Can Improve
+
+| Gap | Leader | Priority |
+|---|---|---|
+| Interactive REPL mode | Aider | P2 |
+| Docker sandboxing | OpenHands | P2 |
+| Permission gates before file writes | Cline | P2 |
+| OpenTelemetry export | OpenHands | P2 |
+| Checkpoint/rollback system | Cline | P2 |
+| Layered YAML config (project + global) | Aider | P2 |
+| Voice input | Aider | P3 |
+| Browser live preview | Bolt.new | P3 |
+| VS Code extension | Cline | P3 |
+| `.agentignore` file | Aider | P2 |
+| Built-in update checker | Aider | P2 |
+
+## Summary
+
+API Generator Agent occupies a unique niche: it is the only tool purpose-built to convert PRDs into production-ready APIs with real database testing. General-purpose agents like Aider and Cline are more flexible but lack the structured pipeline, parallel execution, and domain-specific testing that makes API Generator Agent reliable for API generation.
+
+The P0 and P1 improvements (cost tracking, typed errors, backoff, colored output, dry-run, CI) bring the agent's infrastructure quality in line with mature tools like SWE-agent and Aider, while its core pipeline capabilities remain differentiated.
diff --git a/docs/future-improvements.md b/docs/future-improvements.md
new file mode 100644
index 0000000..2f2a96d
--- /dev/null
+++ b/docs/future-improvements.md
@@ -0,0 +1,311 @@
+# Future Improvements
+
+Prioritized roadmap of features beyond the P0/P1 work already implemented. Items are grouped by priority tier and ordered by impact within each tier.
+
+## P2 — Competitive Parity
+
+These features close gaps with mature agents like Aider, SWE-agent, OpenHands, and Cline. Each one has a clear leader to reference for implementation patterns.
+
+### Docker Image for Zero-Install Distribution
+
+**Reference:** OpenHands publishes to `ghcr.io` with BuildKit caching.
+
+Publish a Docker image so users can run the agent without installing Bun or any local dependencies. The image should include Bun, the agent source, and Docker-in-Docker support for MongoDB test containers.
+
+```bash
+docker run --rm -v $(pwd):/work ghcr.io/your-org/api-generator-agent /work/my-prd.md
+```
+
+**Implementation notes:**
+- Multi-stage Dockerfile: builder (install deps) -> runner (copy node_modules + src)
+- Mount the working directory for PRD input and workspace output
+- Docker-in-Docker or sibling container pattern for MongoDB test containers
+- Publish on GitHub release via CI workflow
+
+---
+
+### Layered Configuration System (YAML)
+
+**Reference:** Aider supports `.aider.conf.yml` at project root, home directory, and CWD with CLI flag overrides.
+
+Replace env-var-only configuration with a layered system:
+
+```
+CLI flags  >  .api-gen-agent.yml (project)  >  ~/.api-gen-agent/config.yml (global)  >  .env  >  defaults
+```
+
+Add an `init` command that generates a starter config with comments:
+
+```bash
+api-generator-agent init
+# Creates .api-gen-agent.yml with all options documented
+```
+
+**Sample config:**
+
+```yaml
+provider: openai
+models:
+  planning: gpt-5.4
+  codegen: gpt-5.4
+  documentation: gpt-5.4
+  qa: gpt-5.4
+pipeline:
+  maxFixIterations: 5
+  maxConcurrency: 4
+  taskCostLimit: 3.00
+  integrationPort: 4100
+```
+
+**Implementation notes:**
+- Use `js-yaml` or `yaml` package to parse
+- Load and merge configs in precedence order before passing to `loadEnv()`
+- Every env var should have a YAML equivalent
+- Validate merged config with the existing Zod schema
+
+---
+
+### Sandboxed Code Execution
+
+**Reference:** OpenHands runs generated code in Docker containers. Bolt.new uses WebContainers (WASM sandbox).
+
+Run generated code and tests inside a Docker container instead of directly on the host. This prevents generated code from accessing the host filesystem, network, or processes.
+
+**Implementation notes:**
+- Create a lightweight Docker image with Bun + MongoDB client
+- Mount only the task's `code/` and `tests/` directories
+- Run `bun test` inside the container
+- Capture stdout/stderr and exit code
+- Fall back to host execution if Docker is unavailable (with a warning)
+
+---
+
+### Checkpoint and Rollback System
+
+**Reference:** Cline lets users diff against any previous checkpoint and roll back to it.
+
+Each iteration already saves code snapshots in `iterations/{n}/code/`. Add a CLI command to restore a specific iteration:
+
+```bash
+api-generator-agent restore <run-id> <task-id> <iteration>
+```
+
+**Implementation notes:**
+- Copy files from `iterations/{n}/code/` back to `tasks/{task-id}/code/`
+- Show a diff before restoring
+- Support `--list` to show available checkpoints with pass/fail status
+
+---
+
+### OpenTelemetry Tracing Export
+
+**Reference:** OpenHands has full OTEL instrumentation with spans for `agent.step`, `tool.execute`, and `llm.completion`. Supports Laminar, Honeycomb, Jaeger, Datadog, and any OTLP backend.
+
+Add optional OTEL export alongside existing LangSmith support. This lets users send traces to their existing observability platform.
+
+**Implementation notes:**
+- Add `@opentelemetry/api` and `@opentelemetry/sdk-trace-node`
+- Create spans for: pipeline phases, task execution, LLM calls, test runs
+- Enable via `OTEL_EXPORTER_OTLP_ENDPOINT` env var
+- When not set, tracing is a no-op (zero overhead)
+- Attach cost and token metadata to LLM spans
+
+---
+
+### `.agentignore` File
+
+**Reference:** Aider supports `.aiderignore` to exclude files from context.
+
+When gathering dependency code for a task, respect an `.agentignore` file at the project root. This prevents the agent from reading large generated files, vendored code, or sensitive files.
+
+```gitignore
+# .agentignore
+node_modules/
+.workspace/
+*.min.js
+vendor/
+```
+
+**Implementation notes:**
+- Use the `ignore` npm package (already an indirect dependency via eslint)
+- Load `.agentignore` once at pipeline start
+- Filter file lists through the ignore matcher before passing to agents
+
+---
+
+### Built-in Update Checker
+
+**Reference:** Aider checks for new versions on startup with `--check-update` (on by default, `--no-check-update` to disable).
+
+On startup, check npm registry for the latest published version. If newer than the running version, print a one-line notice.
+
+```
+[info] Update available: 0.1.0 -> 0.2.0  (run: bun update -g api-generator-agent)
+```
+
+**Implementation notes:**
+- Fetch `https://registry.npmjs.org/api-generator-agent/latest` with a 2-second timeout
+- Compare against `version` from package.json
+- Cache the check result for 24 hours in `~/.api-gen-agent/last-update-check`
+- Skip when `--no-check-update` flag is passed or `NO_UPDATE_CHECK=1` is set
+
+---
+
+### CHANGELOG.md with Auto-Generation
+
+**Reference:** Aider maintains a changelog across 93+ releases. SWE-agent uses conventional commits for auto-generation.
+
+Maintain a `CHANGELOG.md` and auto-generate it from conventional commits on release.
+
+**Implementation notes:**
+- Add `conventional-changelog-cli` as a dev dependency
+- Add a `release` script: `bunx conventional-changelog -p angular -i CHANGELOG.md -s`
+- Run as part of the GitHub Actions release workflow
+- Tag releases with semver: `git tag v0.2.0`
+
+---
+
+### Permission Gates Before File Writes
+
+**Reference:** Cline requires human approval for every file write, terminal command, and browser action.
+
+Add an optional `--confirm` flag that prompts the user before writing generated code to the workspace. Useful when users want to review the plan and first iteration before committing to a full run.
+
+**Implementation notes:**
+- In `--confirm` mode, pause after planning and display the task graph
+- Pause again after first code generation to show a file list
+- Use `readline` or Bun's stdin for yes/no prompts
+- Default off (non-interactive pipeline mode is the primary use case)
+
+---
+
+## P3 — Differentiation
+
+These features would make the agent stand out from competitors but require significant effort. Implement after P2 is complete and the agent has real user adoption.
+
+### Interactive REPL Mode
+
+**Reference:** Aider has a full REPL with tab completion, vim bindings, multi-line input, and `/commands`.
+
+Add an interactive mode where users can iteratively refine the PRD, re-run specific tasks, inspect generated code, and modify configuration without restarting.
+
+```bash
+api-generator-agent --interactive
+> load beautician-scheduling-prd.md
+> plan
+> run setup-foundation
+> show endpoint-auth
+> fix endpoint-auth "auth middleware should use .as('plugin')"
+> run-all
+```
+
+**Implementation notes:**
+- Use `readline` or `@anthropic-ai/sdk` for line editing
+- Maintain pipeline state across commands
+- Support `/plan`, `/run <task>`, `/show <task>`, `/fix <task> <hint>`, `/status`, `/cost`
+- This is a large feature — consider it a standalone milestone
+
+---
+
+### Browser Live Preview
+
+**Reference:** Bolt.new runs code in WebContainers with live hot-reload preview.
+
+After assembly, automatically start the generated API and open a browser with a Swagger/Hoppscotch-like UI showing all endpoints.
+
+**Implementation notes:**
+- Start the assembled Elysia app on a random port
+- Generate a minimal HTML page with endpoint list and request forms
+- Open with `Bun.spawn(["open", url])` or platform equivalent
+- Kill the server on Ctrl+C
+
+---
+
+### VS Code Extension
+
+**Reference:** Cline integrates deeply into VS Code with diff views, permission prompts, terminal streaming, and checkpoint timelines.
+
+Wrap the CLI in a VS Code extension that shows:
+- Task graph visualization in a panel
+- Real-time log streaming
+- Per-task code diffs
+- One-click re-run of failed tasks
+
+**Implementation notes:**
+- Use the VS Code Extension API with a WebView panel
+- Communicate with the CLI process via JSON-RPC or stdout parsing
+- This is a major project — consider as a v2.0 feature
+
+---
+
+### Voice Input
+
+**Reference:** Aider supports voice input for PRD dictation and commands.
+
+Allow users to dictate PRDs or commands via microphone using Whisper or a similar STT model.
+
+**Implementation notes:**
+- Use OpenAI Whisper API or a local Whisper model
+- Pipe transcribed text to the existing PRD input path
+- Gated behind `--voice` flag
+
+---
+
+### Multi-Framework Support
+
+Currently the agent only generates Elysia/Bun APIs. Add support for generating APIs in other frameworks:
+
+- **Express/Node** — the most common target for hiring/teams
+- **Fastify/Node** — for performance-focused teams
+- **Hono/Bun** — Elysia alternative gaining traction
+
+**Implementation notes:**
+- Add a `--framework` flag and `FRAMEWORK` env var
+- Per-framework prompt templates in `src/prompts/frameworks/`
+- Per-framework test harness in the QA agent
+- Setup-foundation task generates framework-specific scaffold
+
+---
+
+### Benchmark Harness
+
+**Reference:** GPT Engineer and SWE-agent ship `bench` binaries for evaluating agent quality against standard datasets.
+
+Build a benchmark suite that measures:
+- Pass rate across a set of PRDs (todo, bookmarks, scheduling, etc.)
+- Iterations to pass
+- Cost per PRD
+- Regression detection across agent versions
+
+```bash
+api-generator-agent bench --suite standard --provider openai
+```
+
+**Implementation notes:**
+- `examples/` directory already has PRDs to use as benchmarks
+- Record results to `benchmarks/{date}/{provider}.json`
+- Compare against previous runs to detect regressions
+- Run in CI on release branches
+
+---
+
+## Priority Summary
+
+| Tier | Items | Theme |
+|---|---|---|
+| **P2** | Docker image, YAML config, sandbox, checkpoints, OTEL, .agentignore, update checker, changelog, permission gates | Catch up with mature agents |
+| **P3** | Interactive REPL, browser preview, VS Code extension, voice input, multi-framework, benchmark harness | Stand-alone differentiators |
+
+## Implementation Order (Recommended)
+
+Within P2, tackle in this order based on user impact vs effort:
+
+1. **YAML config + `init` command** — most requested by users setting up projects
+2. **Docker image** — unlocks CI/CD use cases and zero-install trial
+3. **`.agentignore`** — quick win, prevents context bloat
+4. **Update checker** — small effort, improves upgrade adoption
+5. **CHANGELOG.md** — pairs with the release workflow
+6. **Checkpoint/rollback** — leverages existing iteration snapshots
+7. **Sandboxed execution** — requires Docker-in-Docker, more complex
+8. **Permission gates** — useful but non-default, lower priority
+9. **OTEL export** — valuable for enterprise users, LangSmith covers most cases today
diff --git a/docs/getting-started.md b/docs/getting-started.md
new file mode 100644
index 0000000..49edcf7
--- /dev/null
+++ b/docs/getting-started.md
@@ -0,0 +1,349 @@
+# Getting Started
+
+A step-by-step walkthrough for generating your first API from a PRD.
+
+## Step 1: Install Dependencies
+
+```bash
+cd api-generator-agent
+bun install
+```
+
+## Step 2: Start Docker
+
+The agent spins up a MongoDB container to run real integration tests against your generated code. Make sure Docker is running:
+
+```bash
+docker info
+```
+
+If you see connection errors, start Docker Desktop (Windows/Mac) or the Docker daemon (Linux).
+
+## Step 3: Configure Your LLM Provider
+
+Create a `.env` file in the project root. Pick one of the three provider options below.
+
+### Option A: Ollama (local, free)
+
+Install [Ollama](https://ollama.com), pull a model, then:
+
+```bash
+# .env
+LLM_PROVIDER=ollama
+OLLAMA_HOST=http://localhost:11434
+```
+
+The agent uses these Ollama models by default:
+- `qwen3.5:27b` for planning and documentation
+- `qwen3-coder-next` for code generation and QA
+
+Pull them before your first run:
+
+```bash
+ollama pull qwen3.5:27b
+ollama pull qwen3-coder-next
+```
+
+### Option B: OpenAI (cloud)
+
+```bash
+# .env
+LLM_PROVIDER=openai
+OPENAI_API_KEY=sk-your-key-here
+```
+
+### Option C: Anthropic (cloud)
+
+```bash
+# .env
+LLM_PROVIDER=anthropic
+ANTHROPIC_API_KEY=sk-ant-your-key-here
+```
+
+### Adding Fallback Tiers (optional)
+
+Set additional API keys to enable fallback escalation. The primary provider is used first. If it fails on a task after `MAX_FIX_ITERATIONS`, the agent escalates to the next available provider.
+
+```bash
+# .env — primary is OpenAI, falls back to Anthropic
+LLM_PROVIDER=openai
+OPENAI_API_KEY=sk-...
+ANTHROPIC_API_KEY=sk-ant-...
+```
+
+## Step 4: Write a PRD
+
+Create a markdown file that describes your API. The planning agent needs four things:
+
+1. **Overview** - one paragraph describing the API
+2. **Data Models** - entities with field names, types, and constraints
+3. **Endpoints** - HTTP method, path, and what it does
+4. **Business Rules** - validation, access control, domain logic
+
+A sample PRD is included at `examples/bookmark-api-prd.md`. Here is a stripped-down version to illustrate the minimum viable PRD:
+
+```markdown
+# Notes API
+
+## Overview
+A simple notes API. Users register, log in, and manage personal notes.
+
+## Data Models
+
+### User
+- id: UUID (auto-generated)
+- email: string (unique, required)
+- name: string (required)
+- createdAt: datetime
+
+### Note
+- id: UUID (auto-generated)
+- userId: UUID (foreign key to User)
+- title: string (required, max 200 chars)
+- body: string (optional)
+- createdAt: datetime
+- updatedAt: datetime
+
+## Endpoints
+
+### Auth
+- POST /api/v1/auth/register - Register (email, name, password)
+- POST /api/v1/auth/login - Login, returns JWT
+
+### Notes (auth required)
+- GET /api/v1/notes - List notes for authenticated user (pagination)
+- POST /api/v1/notes - Create a note
+- GET /api/v1/notes/:id - Get a note
+- PUT /api/v1/notes/:id - Update a note
+- DELETE /api/v1/notes/:id - Delete a note
+
+## Business Rules
+- Users can only access their own notes
+- Pagination: page=1, limit=20, max 100
+
+## Non-Functional
+- Health check at GET /healthz
+```
+
+Save this as `my-notes-prd.md` in the project root.
+
+## Step 5: Run the Agent
+
+The agent accepts a file path, raw text, or piped stdin:
+
+```bash
+# From a file
+bun run src/index.mts my-notes-prd.md
+
+# Raw text (for quick prototyping)
+bun run src/index.mts "Build a notes API with user auth, CRUD on notes, and pagination"
+
+# Piped from stdin
+cat my-notes-prd.md | bun run src/index.mts -
+```
+
+This kicks off the full pipeline. You will see log output for each phase:
+
+```
+[info] PRD loaded from /path/to/my-notes-prd.md (812 chars)
+[info] Config: maxIterations=5, concurrency=4
+[info] LLM provider: openai
+[info] Phase 1: Planning - generating task graph from PRD
+```
+
+The planning agent produces a task graph. You will see each task listed:
+
+```
+[info] Planning complete: 6 tasks generated in 5200ms (model: gpt-5.4)
+[info]   [plan] Task: setup-foundation - "Project setup" (depends: [])
+[info]   [plan] Task: model-user - "User schema" (depends: [setup-foundation])
+[info]   [plan] Task: model-note - "Note schema" (depends: [setup-foundation])
+[info]   [plan] Task: middleware-auth - "JWT auth" (depends: [model-user])
+[info]   [plan] Task: endpoint-auth - "Auth routes" (depends: [model-user, middleware-auth])
+[info]   [plan] Task: endpoint-notes - "Notes routes" (depends: [model-note, middleware-auth])
+```
+
+Then the code generation and testing phase runs. Each task goes through the codegen -> lint -> test loop:
+
+```
+[info] Phase 2: Executing task graph
+[info]   [executor] Ready: setup-foundation
+[info]   [codegen] Trying model: gpt-5.4
+[info]   [codegen] Success with model gpt-5.4 (3100ms)
+[info]   [qa] setup-foundation: unit PASS
+[info]   [executor] Completed: setup-foundation (1 iteration)
+[info]   [executor] Ready: model-user, model-note
+```
+
+If a test fails, the agent retries with error context:
+
+```
+[info]   [codegen] Trying model: gpt-5.4 (fix attempt 2/5)
+[warn]   [qa] endpoint-auth: unit FAIL - 2 errors
+[info]   [codegen] Trying model: gpt-5.4 (fix attempt 3/5)
+[info]   [qa] endpoint-auth: unit PASS
+[info]   [executor] Completed: endpoint-auth (3 iterations)
+```
+
+Finally, the assembly and documentation phases run:
+
+```
+[info] Phase 2.25: Assembly - wiring endpoint plugins into index.mts
+[info]   [assembly] Found plugin: authRoutes in src/routes/auth.mts
+[info]   [assembly] Found plugin: noteRoutes in src/routes/notes.mts
+[info]   [assembly] Assembled index.mts with 2 plugin(s)
+
+[info] Phase 3: Generating documentation
+[info] Documentation generated successfully
+
+[info] === Pipeline Results ===
+[info] Run ID: f47ac10b-58cc-4372-a567-0e02b2c3d479
+[info] Duration: 185000ms
+[info] Documentation: generated
+[info] Tasks: 6 completed, 0 failed, 0 skipped
+[info]   [OK] setup-foundation (1 iterations)
+[info]   [OK] model-user (1 iterations)
+[info]   [OK] model-note (1 iterations)
+[info]   [OK] middleware-auth (2 iterations)
+[info]   [OK] endpoint-auth (3 iterations)
+[info]   [OK] endpoint-notes (2 iterations)
+[info] Workspace: .workspace/f47ac10b-58cc-4372-a567-0e02b2c3d479/
+```
+
+## Step 6: Inspect the Output
+
+Your generated API lives in the workspace directory. The key files:
+
+```bash
+# The assembled entry point with all routes wired
+cat .workspace/<run-id>/docs/assembled-index.mts
+
+# The task plan
+cat .workspace/<run-id>/plan.json
+
+# Individual task code
+ls .workspace/<run-id>/tasks/endpoint-notes/code/src/
+
+# Execution summary
+cat .workspace/<run-id>/execution-summary.json
+```
+
+The `docs/assembled-index.mts` file is the runnable Elysia app. It imports all generated endpoint plugins and mounts them with `.use()`.
+
+## Step 7: Run the Generated API
+
+Copy the generated code into a new project and start it:
+
+```bash
+# Create a new directory for the generated API
+mkdir my-notes-api && cd my-notes-api
+bun init -y
+
+# Copy all task code into the project
+# Each task's code is in .workspace/<run-id>/tasks/<task-id>/code/
+# The assembled index is at .workspace/<run-id>/docs/assembled-index.mts
+
+# Install the dependencies the generated code needs
+bun add elysia @sinclair/typebox mongodb jose
+
+# Start MongoDB
+docker run -d --name notes-mongo -p 27017:27017 mongo:latest
+
+# Set the connection string and run
+MONGODB_URI=mongodb://localhost:27017/notes-api bun run src/index.mts
+```
+
+The API will be available at `http://localhost:3000`. Test it:
+
+```bash
+# Health check
+curl http://localhost:3000/healthz
+
+# Register a user
+curl -X POST http://localhost:3000/api/v1/auth/register \
+  -H "Content-Type: application/json" \
+  -d '{"email": "user@example.com", "name": "Test User", "password": "secret123"}'
+
+# Login
+curl -X POST http://localhost:3000/api/v1/auth/login \
+  -H "Content-Type: application/json" \
+  -d '{"email": "user@example.com", "password": "secret123"}'
+# Returns: { "data": { "token": "eyJ..." } }
+
+# Create a note (use the token from login)
+curl -X POST http://localhost:3000/api/v1/notes \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer eyJ..." \
+  -d '{"title": "My first note", "body": "Hello world"}'
+
+# List notes
+curl http://localhost:3000/api/v1/notes \
+  -H "Authorization: Bearer eyJ..."
+```
+
+## Step 8: Import the Hoppscotch Collection (optional)
+
+The agent generates a Hoppscotch collection at `.workspace/<run-id>/docs/hoppscotch-collection.json`. To use it:
+
+1. Open [Hoppscotch](https://hoppscotch.io)
+2. Click **Import** in the collections sidebar
+3. Select the `hoppscotch-collection.json` file
+4. All endpoints are pre-configured with request bodies and headers
+
+## Tuning Tips
+
+### Faster Runs
+
+Reduce iterations and concurrency if you want a quick prototype:
+
+```bash
+bun run src/index.mts my-prd.md 2 5
+#                                ^ ^
+#                                | max 5 tasks
+#                                max 2 fix iterations
+```
+
+### More Reliable Runs
+
+Increase iterations for complex PRDs:
+
+```bash
+MAX_FIX_ITERATIONS=10 bun run src/index.mts beautician-scheduling-prd.md
+```
+
+### Debugging Failures
+
+If a task fails, check its artifacts:
+
+```bash
+# See what errors the QA agent found
+cat .workspace/<run-id>/tasks/<task-id>/qa-results.json
+
+# See the accumulated knowledge (what was tried)
+cat .workspace/<run-id>/tasks/<task-id>/qa-knowledge.md
+
+# See code from each iteration
+ls .workspace/<run-id>/tasks/<task-id>/iterations/
+```
+
+### Re-running Without Re-planning
+
+If you change only pipeline settings (not the PRD), the cached plan is reused automatically. To force a fresh plan:
+
+```bash
+rm -rf .workspace/.plan-cache/
+```
+
+## Example PRDs
+
+| PRD | Complexity | Entities | Endpoints | Description |
+|---|---|---|---|---|
+| `examples/bookmark-api-prd.md` | Medium | 3 (User, Folder, Bookmark) | 14 | Bookmark manager with folders, tags, and search |
+| `sample-prd.md` | Simple | 2 (User, Todo) | 9 | Classic todo app with priorities and pagination |
+| `beautician-scheduling-prd.md` | Complex | 6 (Tenant, Customer, Service, Availability, Appointment, DiscountCode) | 20+ | Multi-tenant appointment scheduling with grace periods and discount codes |
+
+Start with the bookmark API PRD if this is your first time:
+
+```bash
+bun run src/index.mts examples/bookmark-api-prd.md
+```
diff --git a/examples/bookmark-api-prd.md b/examples/bookmark-api-prd.md
new file mode 100644
index 0000000..2c7ea09
--- /dev/null
+++ b/examples/bookmark-api-prd.md
@@ -0,0 +1,76 @@
+# Bookmark Manager API - Product Requirements Document
+
+## Overview
+
+Build a bookmark manager API where users can save, organize, and search web bookmarks. Users register an account, create folders to organize bookmarks, and tag bookmarks for easy filtering. The API supports full CRUD on bookmarks with search by title, tag, and folder.
+
+## Data Models
+
+### User
+- id: UUID (auto-generated)
+- email: string (unique, required)
+- displayName: string (required)
+- passwordHash: string
+- createdAt: datetime
+
+### Folder
+- id: UUID (auto-generated)
+- userId: UUID (foreign key to User, required)
+- name: string (required, max 100 chars)
+- parentFolderId: UUID (optional, self-referencing for nested folders)
+- createdAt: datetime
+
+### Bookmark
+- id: UUID (auto-generated)
+- userId: UUID (foreign key to User, required)
+- folderId: UUID (optional, foreign key to Folder)
+- url: string (required, valid URL)
+- title: string (required, max 200 chars)
+- description: string (optional, max 1000 chars)
+- tags: string array (default empty)
+- isFavorite: boolean (default false)
+- createdAt: datetime
+- updatedAt: datetime
+
+## Endpoints
+
+### Auth
+- POST /api/v1/auth/register - Register a new user (email, displayName, password)
+- POST /api/v1/auth/login - Login and receive a JWT token
+
+### Users
+- GET /api/v1/users/me - Get current user profile (requires auth)
+
+### Folders (auth required)
+- GET /api/v1/folders - List all folders for the authenticated user
+- POST /api/v1/folders - Create a folder (name, optional parentFolderId)
+- PUT /api/v1/folders/:id - Rename a folder
+- DELETE /api/v1/folders/:id - Delete a folder (moves contained bookmarks to unfiled)
+
+### Bookmarks (auth required)
+- GET /api/v1/bookmarks - List bookmarks with pagination and optional filters (folderId, tag, isFavorite, search)
+- POST /api/v1/bookmarks - Create a bookmark (url, title, optional description, tags, folderId)
+- GET /api/v1/bookmarks/:id - Get a single bookmark
+- PUT /api/v1/bookmarks/:id - Update a bookmark
+- DELETE /api/v1/bookmarks/:id - Delete a bookmark
+- PATCH /api/v1/bookmarks/:id/favorite - Toggle favorite status
+
+### Tags (auth required)
+- GET /api/v1/tags - List all unique tags used by the authenticated user with counts
+
+## Business Rules
+
+1. Users can only access their own folders, bookmarks, and tags
+2. Folder names must be unique within the same parent folder for a given user
+3. Deleting a folder does not delete its bookmarks - they become unfiled (folderId set to null)
+4. Bookmark URLs must be valid URLs (start with http:// or https://)
+5. Tags are stored as lowercase, trimmed, and deduplicated
+6. The search filter on GET /api/v1/bookmarks matches against title and description (case-insensitive)
+7. Pagination defaults: page=1, limit=20, max limit=100
+8. The tags endpoint returns each unique tag with a count of how many bookmarks use it
+
+## Non-Functional
+
+- All responses in JSON
+- Health check at GET /healthz
+- Standard error responses with statusCode and message
diff --git a/package.json b/package.json
index 7d03a6e..202d4a7 100644
--- a/package.json
+++ b/package.json
@@ -1,8 +1,20 @@
 {
   "name": "api-generator-agent",
-  "module": "index.ts",
+  "version": "0.1.0",
+  "description": "Multi-agent pipeline that generates production-ready Elysia APIs from PRDs",
+  "module": "src/index.mts",
   "type": "module",
-  "private": true,
+  "bin": {
+    "api-generator-agent": "src/index.mts"
+  },
+  "keywords": ["ai", "code-generation", "elysia", "bun", "api", "agent", "llm"],
+  "license": "ISC",
+  "files": [
+    "src",
+    "docs",
+    "examples",
+    "README.md"
+  ],
   "devDependencies": {
     "@types/bun": "latest",
     "playwright": "^1.59.1"
@@ -21,6 +33,7 @@
     "langchain": "^1.3.0",
     "langsmith": "^0.5.16",
     "ollama": "^0.6.3",
+    "picocolors": "^1.1.1",
     "winston": "^3.19.0",
     "zod": "^4.3.6",
     "zod-to-json-schema": "^3.25.2"
diff --git a/src/agents/base-agent.mts b/src/agents/base-agent.mts
index 0fbb39c..2160fd9 100644
--- a/src/agents/base-agent.mts
+++ b/src/agents/base-agent.mts
@@ -4,24 +4,30 @@ import type { AgentInput, AgentOutput } from '../types/agent-context.mts';
 import type { Result } from '../types/result.mts';
 import { ok, err } from '../types/result.mts';
 import type { AgentRole, ModelChainConfig } from '../config/models.mts';
-import type { OllamaFactory } from '../llm/ollama-factory.mts';
+import type { ILlmFactory } from '../interfaces/i-llm-factory.mjs';
 import { createTraceConfig } from '../llm/tracing.mts';
 import { ThinkingSpinner } from '../llm/thinking-spinner.mts';
 import { withTimeout, LlmTimeoutError } from '../llm/with-timeout.mts';
 
+export interface TokenUsage {
+  inputTokens: number;
+  outputTokens: number;
+}
+
 export abstract class BaseAgent<TIn, TOut> {
 
   protected readonly role: AgentRole;
   protected readonly modelChain: ModelChainConfig;
-  protected readonly llmFactory: OllamaFactory;
+  protected readonly llmFactory: ILlmFactory;
   protected readonly logger: Logger;
   protected readonly timeoutMs: number;
   protected readonly useThinking: boolean;
+  protected _lastTokenUsage: TokenUsage = { inputTokens: 0, outputTokens: 0 };
 
   constructor(
     role: AgentRole,
     modelChain: ModelChainConfig,
-    llmFactory: OllamaFactory,
+    llmFactory: ILlmFactory,
     logger: Logger,
     timeoutMs: number = 1800000,
     useThinking: boolean = false,
@@ -57,6 +63,7 @@ export abstract class BaseAgent<TIn, TOut> {
 
       try {
         this.logger.info(`[${this.role}] Timeout set to ${Math.round(this.timeoutMs / 1000)}s`);
+        this._lastTokenUsage = { inputTokens: 0, outputTokens: 0 };
         const result = await withTimeout(
           this.execute(input, chatModel, traceConfig),
           model,
@@ -74,6 +81,8 @@ export abstract class BaseAgent<TIn, TOut> {
             payload: result.value,
             modelUsed: model,
             durationMs,
+            inputTokens: this._lastTokenUsage.inputTokens,
+            outputTokens: this._lastTokenUsage.outputTokens,
           });
         }
 
@@ -117,6 +126,7 @@ export abstract class BaseAgent<TIn, TOut> {
     spinner.start();
 
     try {
+      this._lastTokenUsage = { inputTokens: 0, outputTokens: 0 };
       const result = await withTimeout(
         this.execute(input, chatModel, traceConfig),
         modelName,
@@ -133,6 +143,8 @@ export abstract class BaseAgent<TIn, TOut> {
           payload: result.value,
           modelUsed: modelName,
           durationMs,
+          inputTokens: this._lastTokenUsage.inputTokens,
+          outputTokens: this._lastTokenUsage.outputTokens,
         });
       }
 
diff --git a/src/agents/codegen-agent.mts b/src/agents/codegen-agent.mts
index 783a3a9..cdd9d8a 100644
--- a/src/agents/codegen-agent.mts
+++ b/src/agents/codegen-agent.mts
@@ -6,13 +6,13 @@ import type { AgentInput } from '../types/agent-context.mts';
 import type { Result } from '../types/result.mts';
 import { ok, err } from '../types/result.mts';
 import type { ModelChainConfig } from '../config/models.mts';
-import type { OllamaFactory } from '../llm/ollama-factory.mts';
+import type { ILlmFactory } from '../interfaces/i-llm-factory.mjs';
 import {
   CODEGEN_SYSTEM_PROMPT,
   createCodegenUserPrompt,
   createFixPrompt,
 } from '../prompts/codegen.mts';
-import { streamInvoke } from '../llm/stream-invoke.mts';
+import { streamInvokeWithUsage } from '../llm/stream-invoke.mts';
 
 export const NO_CODE_BLOCKS_ERROR = `No code blocks found in codegen response`;
 
@@ -37,7 +37,7 @@ export type CodegenOutput = readonly CodeFile[];
 
 export class CodegenAgent extends BaseAgent<CodegenInput, CodegenOutput> {
 
-  constructor(modelChain: ModelChainConfig, llmFactory: OllamaFactory, logger: Logger, timeoutMs?: number) {
+  constructor(modelChain: ModelChainConfig, llmFactory: ILlmFactory, logger: Logger, timeoutMs?: number) {
     super('codegen', modelChain, llmFactory, logger, timeoutMs);
   }
 
@@ -70,7 +70,9 @@ export class CodegenAgent extends BaseAgent<CodegenInput, CodegenOutput> {
     ];
 
     this.logger.info(`[codegen] Sending prompt to LLM (${userPrompt.length} chars) (streaming)`);
-    const content = await streamInvoke(chatModel, messages, traceConfig);
+    const streamResult = await streamInvokeWithUsage(chatModel, messages, traceConfig);
+    const content = streamResult.content;
+    this._lastTokenUsage = { inputTokens: streamResult.inputTokens, outputTokens: streamResult.outputTokens };
 
     this.logger.debug(`[codegen] LLM response received (${content.length} chars)`);
 
diff --git a/src/agents/documentation-agent.mts b/src/agents/documentation-agent.mts
index 5af65c5..4237812 100644
--- a/src/agents/documentation-agent.mts
+++ b/src/agents/documentation-agent.mts
@@ -6,12 +6,12 @@ import type { AgentInput } from '../types/agent-context.mts';
 import type { Result } from '../types/result.mts';
 import { ok, err } from '../types/result.mts';
 import type { ModelChainConfig } from '../config/models.mts';
-import type { OllamaFactory } from '../llm/ollama-factory.mts';
+import type { ILlmFactory } from '../interfaces/i-llm-factory.mjs';
 import {
   DOCUMENTATION_SYSTEM_PROMPT,
   createDocumentationUserPrompt,
 } from '../prompts/documentation.mts';
-import { streamInvoke } from '../llm/stream-invoke.mts';
+import { streamInvokeWithUsage } from '../llm/stream-invoke.mts';
 
 export interface HoppscotchCollection {
   readonly v: number;
@@ -22,7 +22,7 @@ export interface HoppscotchCollection {
 
 export class DocumentationAgent extends BaseAgent<string, HoppscotchCollection> {
 
-  constructor(modelChain: ModelChainConfig, llmFactory: OllamaFactory, logger: Logger, timeoutMs?: number) {
+  constructor(modelChain: ModelChainConfig, llmFactory: ILlmFactory, logger: Logger, timeoutMs?: number) {
     super('documentation', modelChain, llmFactory, logger, timeoutMs);
   }
 
@@ -39,7 +39,9 @@ export class DocumentationAgent extends BaseAgent<string, HoppscotchCollection>
     ];
 
     this.logger.info('[docs] Sending code to LLM for documentation generation (streaming)');
-    const content = await streamInvoke(chatModel, messages, traceConfig);
+    const streamResult = await streamInvokeWithUsage(chatModel, messages, traceConfig);
+    const content = streamResult.content;
+    this._lastTokenUsage = { inputTokens: streamResult.inputTokens, outputTokens: streamResult.outputTokens };
 
     this.logger.debug(`[docs] LLM response received (${content.length} chars)`);
 
diff --git a/src/agents/planning-agent.mts b/src/agents/planning-agent.mts
index 971085b..b3f5610 100644
--- a/src/agents/planning-agent.mts
+++ b/src/agents/planning-agent.mts
@@ -8,9 +8,9 @@ import type { TaskGraph, Task } from '../types/task.mts';
 import type { Result } from '../types/result.mts';
 import { ok, err } from '../types/result.mts';
 import type { ModelChainConfig } from '../config/models.mts';
-import type { OllamaFactory } from '../llm/ollama-factory.mts';
+import type { ILlmFactory } from '../interfaces/i-llm-factory.mjs';
 import { PLANNING_SYSTEM_PROMPT, createPlanningUserPrompt } from '../prompts/planning.mts';
-import { streamInvoke } from '../llm/stream-invoke.mts';
+import { streamInvokeWithUsage } from '../llm/stream-invoke.mts';
 
 const taskSchema = z.object({
   id: z.string(),
@@ -27,7 +27,7 @@ const planResponseSchema = z.object({
 
 export class PlanningAgent extends BaseAgent<string, TaskGraph> {
 
-  constructor(modelChain: ModelChainConfig, llmFactory: OllamaFactory, logger: Logger, timeoutMs?: number) {
+  constructor(modelChain: ModelChainConfig, llmFactory: ILlmFactory, logger: Logger, timeoutMs?: number) {
     super('planning', modelChain, llmFactory, logger, timeoutMs);
   }
 
@@ -44,7 +44,9 @@ export class PlanningAgent extends BaseAgent<string, TaskGraph> {
     ];
 
     this.logger.info('[planning] Sending PRD to LLM for task decomposition (streaming)');
-    const content = await streamInvoke(chatModel, messages, traceConfig);
+    const streamResult = await streamInvokeWithUsage(chatModel, messages, traceConfig);
+    const content = streamResult.content;
+    this._lastTokenUsage = { inputTokens: streamResult.inputTokens, outputTokens: streamResult.outputTokens };
 
     this.logger.debug(`[planning] LLM response received (${content.length} chars)`);
 
diff --git a/src/agents/qa-agent.mts b/src/agents/qa-agent.mts
index c8e55f7..386e0fc 100644
--- a/src/agents/qa-agent.mts
+++ b/src/agents/qa-agent.mts
@@ -7,7 +7,7 @@ import type { AgentInput } from '../types/agent-context.mts';
 import type { Result } from '../types/result.mts';
 import { ok, err } from '../types/result.mts';
 import type { ModelChainConfig } from '../config/models.mts';
-import type { OllamaFactory } from '../llm/ollama-factory.mts';
+import type { ILlmFactory } from '../interfaces/i-llm-factory.mjs';
 import { appendKnowledge, analyzeTestErrors } from './qa-knowledge.mts';
 import type { CodeFile } from './codegen-agent.mts';
 
@@ -43,7 +43,7 @@ export interface QaResult {
 
 export class QaAgent extends BaseAgent<QaInput, QaResult> {
 
-  constructor(modelChain: ModelChainConfig, llmFactory: OllamaFactory, logger: Logger, timeoutMs?: number) {
+  constructor(modelChain: ModelChainConfig, llmFactory: ILlmFactory, logger: Logger, timeoutMs?: number) {
     super(`qa`, modelChain, llmFactory, logger, timeoutMs, false);
   }
 
diff --git a/src/config/env.mts b/src/config/env.mts
index 6802e6f..19604c1 100644
--- a/src/config/env.mts
+++ b/src/config/env.mts
@@ -1,9 +1,13 @@
 import { z } from 'zod';
 
+export const LLM_PROVIDERS = ['ollama', 'openai', 'anthropic'] as const;
+export type LlmProvider = (typeof LLM_PROVIDERS)[number];
+
 const envSchema = z.object({
+  LLM_PROVIDER: z.enum(LLM_PROVIDERS).default('ollama'),
   ANTHROPIC_API_KEY: z.string().optional(),
   OPENAI_API_KEY: z.string().optional(),
-  OLLAMA_HOST: z.string().url().default(`http://192.168.128.230:11434`),
+  OLLAMA_HOST: z.string().url().default(`http://localhost:11434`),
   OLLAMA_API_KEY: z.string().optional(),
   LANGSMITH_TRACING: z
     .string()
@@ -17,6 +21,22 @@ const envSchema = z.object({
   WORKSPACE_DIR: z.string().default('.workspace'),
   LLM_TIMEOUT_MS: z.coerce.number().int().min(10000).max(3600000).default(1800000),
   INTEGRATION_PORT: z.coerce.number().int().min(1024).max(65535).default(4100),
+  TASK_COST_LIMIT: z.coerce.number().min(0).default(3.00),
+}).superRefine((data, ctx) => {
+  if (data.LLM_PROVIDER === 'openai' && !data.OPENAI_API_KEY) {
+    ctx.addIssue({
+      code: z.ZodIssueCode.custom,
+      path: ['OPENAI_API_KEY'],
+      message: 'OPENAI_API_KEY is required when LLM_PROVIDER is "openai"',
+    });
+  }
+  if (data.LLM_PROVIDER === 'anthropic' && !data.ANTHROPIC_API_KEY) {
+    ctx.addIssue({
+      code: z.ZodIssueCode.custom,
+      path: ['ANTHROPIC_API_KEY'],
+      message: 'ANTHROPIC_API_KEY is required when LLM_PROVIDER is "anthropic"',
+    });
+  }
 });
 
 export type EnvConfig = z.infer<typeof envSchema>;
diff --git a/src/config/models.mts b/src/config/models.mts
index b309c49..a738239 100644
--- a/src/config/models.mts
+++ b/src/config/models.mts
@@ -1,3 +1,5 @@
+import type { LlmProvider } from './env.mjs';
+
 export type AgentRole = 'planning' | 'codegen' | 'documentation' | 'qa';
 
 export interface ModelChainConfig {
@@ -5,21 +7,25 @@ export interface ModelChainConfig {
   readonly temperature: number;
 }
 
-export const MODEL_CHAINS: Readonly<Record<AgentRole, ModelChainConfig>> = {
-  planning: {
-    models: ['qwen3.5:27b'],
-    temperature: 0.3,
-  },
-  codegen: {
-    models: [`qwen3-coder-next`],
-    temperature: 0.2,
+export const PROVIDER_MODEL_MAP: Readonly<Record<LlmProvider, Record<AgentRole, ModelChainConfig>>> = {
+  ollama: {
+    planning: { models: ['qwen3.5:27b'], temperature: 0.3 },
+    codegen: { models: ['qwen3-coder-next'], temperature: 0.2 },
+    documentation: { models: ['qwen3.5:27b'], temperature: 0.1 },
+    qa: { models: ['qwen3-coder-next'], temperature: 0.2 },
   },
-  documentation: {
-    models: ['qwen3.5:27b'],
-    temperature: 0.1,
+  openai: {
+    planning: { models: ['gpt-5.4'], temperature: 0.3 },
+    codegen: { models: ['gpt-5.4'], temperature: 0.2 },
+    documentation: { models: ['gpt-5.4'], temperature: 0.1 },
+    qa: { models: ['gpt-5.4'], temperature: 0.2 },
   },
-  qa: {
-    models: [`qwen3-coder-next`],
-    temperature: 0.2,
+  anthropic: {
+    planning: { models: ['claude-sonnet-4-6'], temperature: 0.3 },
+    codegen: { models: ['claude-sonnet-4-6'], temperature: 0.2 },
+    documentation: { models: ['claude-sonnet-4-6'], temperature: 0.1 },
+    qa: { models: ['claude-sonnet-4-6'], temperature: 0.2 },
   },
-} as const;
+};
+
+export const MODEL_CHAINS: Readonly<Record<AgentRole, ModelChainConfig>> = PROVIDER_MODEL_MAP.ollama;
diff --git a/src/container/di.mts b/src/container/di.mts
index acd4222..b0c6e69 100644
--- a/src/container/di.mts
+++ b/src/container/di.mts
@@ -2,22 +2,26 @@ import winston from 'winston';
 import type { Logger } from 'winston';
 import type { EnvConfig } from '../config/env.mts';
 import type { PipelineConfig } from '../types/pipeline.mts';
-import { MODEL_CHAINS } from '../config/models.mts';
+import { PROVIDER_MODEL_MAP } from '../config/models.mts';
+import { redactSecrets } from '../llm/redact-secrets.mts';
+import { CostTracker } from '../llm/cost-tracker.mts';
+import pc from 'picocolors';
+import type { ILlmFactory } from '../interfaces/i-llm-factory.mjs';
 import { OllamaFactory } from '../llm/ollama-factory.mts';
+import { AnthropicFactory } from '../llm/anthropic-factory.mts';
+import { OpenAIFactory } from '../llm/openai-factory.mts';
 import { PlanningAgent } from '../agents/planning-agent.mts';
 import { CodegenAgent } from '../agents/codegen-agent.mts';
 import { EslintAgent } from '../agents/eslint-agent.mts';
 import { QaAgent } from '../agents/qa-agent.mts';
 import { DocumentationAgent } from '../agents/documentation-agent.mts';
-import { AnthropicFactory } from '../llm/anthropic-factory.mts';
-import { OpenAIFactory } from '../llm/openai-factory.mts';
 import type { FallbackTier } from '../config/fallback-tiers.mts';
 
 export interface Container {
 
   readonly logger: Logger;
-  readonly localFactory: OllamaFactory;
-  readonly codegenFactory: OllamaFactory;
+  readonly primaryFactory: ILlmFactory;
+  readonly codegenFactory: ILlmFactory;
   readonly planningAgent: PlanningAgent;
   readonly codegenAgent: CodegenAgent;
   readonly eslintAgent: EslintAgent;
@@ -25,12 +29,64 @@ export interface Container {
   readonly documentationAgent: DocumentationAgent;
   readonly pipelineConfig: PipelineConfig;
   readonly fallbackTiers: readonly FallbackTier[];
+  readonly costTracker: CostTracker;
+}
+
+function createPrimaryFactory(env: EnvConfig, logger: Logger): ILlmFactory {
+  switch (env.LLM_PROVIDER) {
+    case 'ollama': {
+      logger.info(`Primary LLM: Ollama (${env.OLLAMA_HOST})`);
+      return new OllamaFactory({ host: env.OLLAMA_HOST });
+    }
+    case 'openai': {
+      logger.info(`Primary LLM: OpenAI`);
+      return new OpenAIFactory({ apiKey: env.OPENAI_API_KEY! });
+    }
+    case 'anthropic': {
+      logger.info(`Primary LLM: Anthropic`);
+      return new AnthropicFactory({ apiKey: env.ANTHROPIC_API_KEY! });
+    }
+  }
+}
+
+function buildFallbackTiers(env: EnvConfig, logger: Logger): FallbackTier[] {
+  const tiers: FallbackTier[] = [];
+  const primary = env.LLM_PROVIDER;
+
+  if (primary !== 'openai' && env.OPENAI_API_KEY) {
+    const openaiFactory = new OpenAIFactory({ apiKey: env.OPENAI_API_KEY });
+    tiers.push({
+      name: 'gpt-5.4',
+      model: 'gpt-5.4',
+      maxIterations: 16,
+      createChatModel: () => openaiFactory.create('gpt-5.4', 0.2),
+    });
+    logger.info(`Fallback tier: gpt-5.4 (16 iterations)`);
+  }
+
+  if (primary !== 'anthropic' && env.ANTHROPIC_API_KEY) {
+    const anthropicFactory = new AnthropicFactory({ apiKey: env.ANTHROPIC_API_KEY });
+    tiers.push({
+      name: 'claude-sonnet-4-6',
+      model: 'claude-sonnet-4-6',
+      maxIterations: 16,
+      createChatModel: () => anthropicFactory.create('claude-sonnet-4-6', 0.2),
+    });
+    logger.info(`Fallback tier: claude-sonnet-4-6 (16 iterations)`);
+  }
+
+  if (tiers.length === 0) {
+    logger.warn('No fallback tiers configured — set additional API keys for escalation');
+  }
+
+  return tiers;
 }
 
 export function createContainer(env: EnvConfig): Container {
   const logger = winston.createLogger({
     level: 'info',
     format: winston.format.combine(
+      redactSecrets(),
       winston.format.timestamp(),
       winston.format.json(),
     ),
@@ -38,41 +94,58 @@ export function createContainer(env: EnvConfig): Container {
     transports: [
       new winston.transports.Console({
         format: winston.format.combine(
-          winston.format.colorize(),
           winston.format.printf(({ timestamp, level, message, ...meta }) => {
             const metaStr = Object.keys(meta).length > 1
-              ? ` ${JSON.stringify(meta)}`
+              ? ` ${pc.dim(JSON.stringify(meta))}`
               : '';
-            return `${String(timestamp)} [${level}] ${String(message)}${metaStr}`;
+            const ts = pc.dim(String(timestamp));
+            const msg = String(message);
+            let coloredLevel: string;
+            switch (level) {
+              case 'error': coloredLevel = pc.red(pc.bold('ERR')); break;
+              case 'warn': coloredLevel = pc.yellow('WRN'); break;
+              case 'debug': coloredLevel = pc.gray('DBG'); break;
+              default: coloredLevel = pc.green('INF'); break;
+            }
+            // Highlight phase headers
+            const coloredMsg = msg.startsWith('Phase ') || msg.startsWith('===')
+              ? pc.cyan(pc.bold(msg))
+              : msg.includes('[OK]') ? pc.green(msg)
+              : msg.includes('[FAIL]') ? pc.red(msg)
+              : msg.includes('[SKIP]') ? pc.yellow(msg)
+              : msg;
+            return `${ts} ${coloredLevel} ${coloredMsg}${metaStr}`;
           }),
         ),
       }),
     ],
   });
 
-  // Local Ollama for planning, QA, docs (qwen3.5:27b)
-  const localFactory = new OllamaFactory({ host: env.OLLAMA_HOST });
-  logger.info(`Local Ollama: ${env.OLLAMA_HOST}`);
+  const provider = env.LLM_PROVIDER;
+  const modelChains = PROVIDER_MODEL_MAP[provider];
 
-  // Cloud Ollama for codegen (qwen3-coder-next) — falls back to local if no API key
-  const CLOUD_TIMEOUT_MS = 600000; // 10 minutes for cloud models
-  const codegenFactory = env.OLLAMA_API_KEY
-    ? new OllamaFactory({ host: `https://api.ollama.com`, apiKey: env.OLLAMA_API_KEY, timeoutMs: CLOUD_TIMEOUT_MS })
-    : localFactory;
-  logger.info(`Codegen Ollama: ${env.OLLAMA_API_KEY ? `https://api.ollama.com (cloud, timeout: ${CLOUD_TIMEOUT_MS / 1000}s)` : `${env.OLLAMA_HOST} (local)`}`);
+  const primaryFactory = createPrimaryFactory(env, logger);
+
+  // For Ollama, use cloud Ollama for codegen if API key is set
+  const CLOUD_TIMEOUT_MS = 600000;
+  let codegenFactory: ILlmFactory = primaryFactory;
+  if (provider === 'ollama' && env.OLLAMA_API_KEY) {
+    codegenFactory = new OllamaFactory({ host: 'https://api.ollama.com', apiKey: env.OLLAMA_API_KEY, timeoutMs: CLOUD_TIMEOUT_MS });
+    logger.info(`Codegen Ollama: https://api.ollama.com (cloud, timeout: ${CLOUD_TIMEOUT_MS / 1000}s)`);
+  }
 
   const timeoutMs = env.LLM_TIMEOUT_MS;
   logger.info(`LLM timeout set to ${Math.round(timeoutMs / 1000)}s`);
 
   const planningAgent = new PlanningAgent(
-    MODEL_CHAINS.planning,
-    localFactory,
+    modelChains.planning,
+    primaryFactory,
     logger,
     timeoutMs,
   );
 
   const codegenAgent = new CodegenAgent(
-    MODEL_CHAINS.codegen,
+    modelChains.codegen,
     codegenFactory,
     logger,
     timeoutMs,
@@ -81,61 +154,35 @@ export function createContainer(env: EnvConfig): Container {
   const eslintAgent = new EslintAgent(logger);
 
   const qaAgent = new QaAgent(
-    MODEL_CHAINS.qa,
+    modelChains.qa,
     codegenFactory,
     logger,
     timeoutMs,
   );
 
   const documentationAgent = new DocumentationAgent(
-    MODEL_CHAINS.documentation,
-    localFactory,
+    modelChains.documentation,
+    primaryFactory,
     logger,
     timeoutMs,
   );
 
-  // Build fallback tiers for codegen retry/escalation
-  const fallbackTiers: FallbackTier[] = [];
-
-  // Tier 2: OpenAI GPT-5.4
-  if (env.OPENAI_API_KEY) {
-    const openaiFactory = new OpenAIFactory({ apiKey: env.OPENAI_API_KEY });
-    fallbackTiers.push({
-      name: `gpt-5.4`,
-      model: `gpt-5.4`,
-      maxIterations: 16,
-      createChatModel: () => openaiFactory.create(`gpt-5.4`, 0.2),
-    });
-    logger.info(`Fallback Tier 2: gpt-5.4 (16 iterations)`);
-  }
-
-  // Tier 3: Claude Sonnet 4.6 via Anthropic API
-  if (env.ANTHROPIC_API_KEY) {
-    const anthropicFactory = new AnthropicFactory({ apiKey: env.ANTHROPIC_API_KEY });
-    fallbackTiers.push({
-      name: `claude-sonnet-4-6`,
-      model: `claude-sonnet-4-6`,
-      maxIterations: 16,
-      createChatModel: () => anthropicFactory.create(`claude-sonnet-4-6`, 0.2),
-    });
-    logger.info(`Fallback Tier 3: claude-sonnet-4-6 (16 iterations)`);
-  }
-
-  if (fallbackTiers.length === 0) {
-    logger.warn(`No fallback tiers configured — set OLLAMA_API_KEY and/or ANTHROPIC_API_KEY`);
-  }
+  const costTracker = new CostTracker(logger);
+  const fallbackTiers = buildFallbackTiers(env, logger);
 
   const pipelineConfig: PipelineConfig = {
     maxFixIterations: env.MAX_FIX_ITERATIONS,
     maxConcurrency: env.MAX_CONCURRENCY,
     workspaceDir: env.WORKSPACE_DIR,
-    ollamaHost: env.OLLAMA_HOST,
+    llmProvider: provider,
+    llmProviderHost: provider === 'ollama' ? env.OLLAMA_HOST : undefined,
     integrationPort: env.INTEGRATION_PORT,
+    taskCostLimit: env.TASK_COST_LIMIT,
   };
 
   return {
     logger,
-    localFactory,
+    primaryFactory,
     codegenFactory,
     planningAgent,
     codegenAgent,
@@ -144,5 +191,6 @@ export function createContainer(env: EnvConfig): Container {
     documentationAgent,
     pipelineConfig,
     fallbackTiers,
+    costTracker,
   };
 }
diff --git a/src/index.mts b/src/index.mts
index 3d3297c..9e3be4b 100644
--- a/src/index.mts
+++ b/src/index.mts
@@ -1,22 +1,97 @@
-import { readFile } from 'node:fs/promises';
+import { readFile, access } from 'node:fs/promises';
 import { resolve } from 'node:path';
 import { loadEnv } from './config/env.mts';
 import { createContainer } from './container/di.mts';
 import { runPipeline } from './orchestrator/pipeline.mts';
 
+async function readStdin(): Promise<string> {
+  const chunks: Buffer[] = [];
+  for await (const chunk of Bun.stdin.stream()) {
+    chunks.push(Buffer.from(chunk));
+  }
+  return Buffer.concat(chunks).toString('utf-8');
+}
+
+async function isFile(path: string): Promise<boolean> {
+  try {
+    await access(path);
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+async function loadPrd(input: string): Promise<{ prdText: string; source: string }> {
+  // "-" means read from stdin
+  if (input === '-') {
+    const prdText = await readStdin();
+    return { prdText, source: 'stdin' };
+  }
+
+  // Try as a file path first
+  const resolvedPath = resolve(input);
+  if (await isFile(resolvedPath)) {
+    const prdText = await readFile(resolvedPath, 'utf-8');
+    return { prdText, source: resolvedPath };
+  }
+
+  // Not a file — treat the input as raw PRD text
+  return { prdText: input, source: 'inline text' };
+}
+
+interface CliFlags {
+  dryRun: boolean;
+  verbose: boolean;
+  quiet: boolean;
+}
+
+function parseArgs(argv: string[]): { flags: CliFlags; positional: string[] } {
+  const flags: CliFlags = { dryRun: false, verbose: false, quiet: false };
+  const positional: string[] = [];
+
+  for (const arg of argv) {
+    switch (arg) {
+      case '--dry-run':
+        flags.dryRun = true;
+        break;
+      case '--verbose':
+        flags.verbose = true;
+        break;
+      case '--quiet':
+        flags.quiet = true;
+        break;
+      default:
+        positional.push(arg);
+    }
+  }
+
+  return { flags, positional };
+}
+
 async function main(): Promise<void> {
-  const args = process.argv.slice(2);
-  const prdPath = args[0];
-  const maxIterations = args[1] ? parseInt(args[1], 10) : undefined;
-  const maxTasks = args[2] ? parseInt(args[2], 10) : undefined;
+  const { flags, positional } = parseArgs(process.argv.slice(2));
+  const prdInput = positional[0];
+  const maxIterations = positional[1] ? parseInt(positional[1], 10) : undefined;
+  const maxTasks = positional[2] ? parseInt(positional[2], 10) : undefined;
 
-  if (!prdPath) {
-    console.error('Usage: bun run src/index.mts <prd-file> [max-iterations] [max-tasks]');
+  if (!prdInput) {
+    console.error('Usage: bun run src/index.mts [options] <prd-file-or-text> [max-iterations] [max-tasks]');
     console.error('');
     console.error('Arguments:');
-    console.error('  prd-file         Path to the PRD markdown/text file');
-    console.error('  max-iterations   Max fix loop iterations (default: 5)');
-    console.error('  max-tasks        Max tasks to run (default: all)');
+    console.error('  prd-file-or-text  Path to a PRD file, raw PRD text, or "-" for stdin');
+    console.error('  max-iterations    Max fix loop iterations (default: 5)');
+    console.error('  max-tasks         Max tasks to run (default: all)');
+    console.error('');
+    console.error('Options:');
+    console.error('  --dry-run         Show the task plan without executing');
+    console.error('  --verbose         Enable debug-level logging');
+    console.error('  --quiet           Only show warnings and errors');
+    console.error('');
+    console.error('Examples:');
+    console.error('  bun run src/index.mts my-api-prd.md');
+    console.error('  bun run src/index.mts --dry-run sample-prd.md');
+    console.error('  bun run src/index.mts "Build a notes API with auth and CRUD endpoints"');
+    console.error('  cat prd.md | bun run src/index.mts -');
     process.exit(1);
   }
 
@@ -25,21 +100,31 @@ async function main(): Promise<void> {
   const container = createContainer(env);
   const { logger, pipelineConfig } = container;
 
-  const resolvedPath = resolve(prdPath);
-  logger.info(`Reading PRD from: ${resolvedPath}`);
+  // Apply log level from CLI flags
+  if (flags.verbose) {
+    logger.level = 'debug';
+  } else if (flags.quiet) {
+    logger.level = 'warn';
+  }
 
   let prdText: string;
   try {
-    prdText = await readFile(resolvedPath, 'utf-8');
+    const loaded = await loadPrd(prdInput);
+    prdText = loaded.prdText;
+    logger.info(`PRD loaded from ${loaded.source} (${prdText.length} chars)`);
   } catch (e) {
     const msg = e instanceof Error ? e.message : String(e);
-    logger.error(`Failed to read PRD file: ${msg}`);
+    logger.error(`Failed to load PRD: ${msg}`);
+    process.exit(1);
+  }
+
+  if (prdText.trim().length === 0) {
+    logger.error('PRD input is empty');
     process.exit(1);
   }
 
-  logger.info(`PRD loaded (${prdText.length} chars)`);
   logger.info(`Config: maxIterations=${pipelineConfig.maxFixIterations}, concurrency=${pipelineConfig.maxConcurrency}`);
-  logger.info(`LLM provider: Ollama (${pipelineConfig.ollamaHost})`);
+  logger.info(`LLM provider: ${pipelineConfig.llmProvider}${pipelineConfig.llmProviderHost ? ` (${pipelineConfig.llmProviderHost})` : ''}`);
 
   let effectiveConfig = maxIterations !== undefined && !isNaN(maxIterations)
     ? { ...pipelineConfig, maxFixIterations: maxIterations }
@@ -50,6 +135,26 @@ async function main(): Promise<void> {
     logger.info(`Max tasks: ${maxTasks}`);
   }
 
+  if (flags.dryRun) {
+    logger.info('=== Dry Run Mode ===');
+    logger.info('Planning only — no code generation or testing will be performed.');
+    const planResult = await container.planningAgent.run({
+      runId: crypto.randomUUID(),
+      payload: prdText,
+      iteration: 0,
+    });
+    if (!planResult.ok) {
+      logger.error(`Planning failed: ${planResult.error.message}`);
+      process.exit(1);
+    }
+    const tasks = planResult.value.payload.tasks;
+    logger.info(`Plan: ${tasks.length} tasks`);
+    for (const task of tasks) {
+      logger.info(`  [${task.type}] ${task.id} — "${task.name}" (depends: [${task.dependsOn.join(', ')}])`);
+    }
+    process.exit(0);
+  }
+
   const result = await runPipeline(prdText, effectiveConfig, {
     planningAgent: container.planningAgent,
     codegenAgent: container.codegenAgent,
@@ -58,7 +163,8 @@ async function main(): Promise<void> {
     documentationAgent: container.documentationAgent,
     logger,
     fallbackTiers: container.fallbackTiers,
-    localFactory: container.localFactory,
+    primaryFactory: container.primaryFactory,
+    costTracker: container.costTracker,
   });
 
   if (!result.ok) {
@@ -84,6 +190,14 @@ async function main(): Promise<void> {
   }
 
   logger.info(`Workspace: .workspace/${pipeline.runId}/`);
+  logger.info(`Report: .workspace/${pipeline.runId}/report.md`);
+
+  // Cost summary
+  const costSummary = container.costTracker.getSummary();
+  logger.info(`=== Cost Summary ===`);
+  logger.info(`LLM calls: ${costSummary.callCount}`);
+  logger.info(`Tokens: ${costSummary.totalInputTokens.toLocaleString()} input, ${costSummary.totalOutputTokens.toLocaleString()} output`);
+  logger.info(`Total cost: $${costSummary.totalCost.toFixed(4)}`);
 
   if (failed > 0) {
     process.exit(1);
diff --git a/src/interfaces/i-llm-factory.mts b/src/interfaces/i-llm-factory.mts
new file mode 100644
index 0000000..719bf13
--- /dev/null
+++ b/src/interfaces/i-llm-factory.mts
@@ -0,0 +1,6 @@
+import type { BaseChatModel } from "@langchain/core/language_models/chat_models";
+
+export interface ILlmFactory {
+  create(model: string, temperature: number): BaseChatModel;
+  createWithThinking(model: string, temperature: number): BaseChatModel;
+}
diff --git a/src/io/report-generator.mts b/src/io/report-generator.mts
new file mode 100644
index 0000000..6ed4541
--- /dev/null
+++ b/src/io/report-generator.mts
@@ -0,0 +1,198 @@
+import type { Task, TaskGraph, TaskState } from "../types/task.mts";
+import type { CostSummary } from "../llm/cost-tracker.mts";
+
+export interface ReportData {
+  readonly runId: string;
+  readonly durationMs: number;
+  readonly prdLength: number;
+  readonly llmProvider: string;
+  readonly llmProviderHost?: string;
+  readonly maxFixIterations: number;
+  readonly maxConcurrency: number;
+  readonly taskGraph: TaskGraph;
+  readonly taskStates: readonly TaskState[];
+  readonly integrationResults: Record<string, { passed: boolean; errors: readonly string[] }>;
+  readonly documentationGenerated: boolean;
+  readonly costSummary?: CostSummary;
+  readonly generatedFiles: readonly GeneratedFileEntry[];
+  readonly assembledIndexPath?: string;
+}
+
+export interface GeneratedFileEntry {
+  readonly taskId: string;
+  readonly filePath: string;
+}
+
+function statusIcon(status: string): string {
+  switch (status) {
+    case "completed": return "PASS";
+    case "failed": return "FAIL";
+    case "skipped": return "SKIP";
+    default: return status;
+  }
+}
+
+function formatDuration(ms: number): string {
+  if (ms < 1000) return `${ms}ms`;
+  const seconds = Math.round(ms / 1000);
+  if (seconds < 60) return `${seconds}s`;
+  const minutes = Math.floor(seconds / 60);
+  const remainingSeconds = seconds % 60;
+  return `${minutes}m ${remainingSeconds}s`;
+}
+
+export function generateReport(data: ReportData): string {
+  const lines: string[] = [];
+  const completed = data.taskStates.filter((s) => s.status === "completed").length;
+  const failed = data.taskStates.filter((s) => s.status === "failed").length;
+  const skipped = data.taskStates.filter((s) => s.status === "skipped").length;
+  const total = data.taskStates.length;
+
+  // Header
+  lines.push(`# Pipeline Run Report`);
+  lines.push(``);
+  lines.push(`| Field | Value |`);
+  lines.push(`|---|---|`);
+  lines.push(`| Run ID | \`${data.runId}\` |`);
+  lines.push(`| Date | ${new Date().toISOString()} |`);
+  lines.push(`| Duration | ${formatDuration(data.durationMs)} |`);
+  lines.push(`| PRD Size | ${data.prdLength.toLocaleString()} chars |`);
+  lines.push(`| LLM Provider | ${data.llmProvider}${data.llmProviderHost ? ` (${data.llmProviderHost})` : ""} |`);
+  lines.push(`| Max Iterations | ${data.maxFixIterations} |`);
+  lines.push(`| Concurrency | ${data.maxConcurrency} |`);
+  lines.push(`| Tasks | ${completed} passed, ${failed} failed, ${skipped} skipped (${total} total) |`);
+  lines.push(`| Documentation | ${data.documentationGenerated ? "Generated" : "Not generated"} |`);
+  lines.push(``);
+
+  // Task Plan
+  lines.push(`## Task Plan`);
+  lines.push(``);
+  lines.push(`| # | Task ID | Name | Type | Dependencies |`);
+  lines.push(`|---|---|---|---|---|`);
+  data.taskGraph.tasks.forEach((task: Task, index: number) => {
+    const deps = task.dependsOn.length > 0 ? task.dependsOn.join(", ") : "none";
+    lines.push(`| ${index + 1} | \`${task.id}\` | ${task.name} | ${task.type} | ${deps} |`);
+  });
+  lines.push(``);
+
+  // Task Results
+  lines.push(`## Task Results`);
+  lines.push(``);
+  lines.push(`| Task ID | Status | Iterations | Error |`);
+  lines.push(`|---|---|---|---|`);
+  for (const state of data.taskStates) {
+    const error = state.lastError ? state.lastError.substring(0, 100) : "-";
+    lines.push(`| \`${state.taskId}\` | ${statusIcon(state.status)} | ${state.iteration} | ${error} |`);
+  }
+  lines.push(``);
+
+  // Integration Test Results
+  const integrationEntries = Object.entries(data.integrationResults);
+  if (integrationEntries.length > 0) {
+    lines.push(`## Integration Tests`);
+    lines.push(``);
+    lines.push(`| Task ID | Result | Errors |`);
+    lines.push(`|---|---|---|`);
+    for (const [taskId, result] of integrationEntries) {
+      const errors = result.errors.length > 0 ? result.errors.join("; ").substring(0, 120) : "-";
+      lines.push(`| \`${taskId}\` | ${result.passed ? "PASS" : "FAIL"} | ${errors} |`);
+    }
+    lines.push(``);
+  }
+
+  // Cost Summary
+  if (data.costSummary && data.costSummary.callCount > 0) {
+    lines.push(`## Cost Summary`);
+    lines.push(``);
+    lines.push(`| Metric | Value |`);
+    lines.push(`|---|---|`);
+    lines.push(`| LLM Calls | ${data.costSummary.callCount} |`);
+    lines.push(`| Input Tokens | ${data.costSummary.totalInputTokens.toLocaleString()} |`);
+    lines.push(`| Output Tokens | ${data.costSummary.totalOutputTokens.toLocaleString()} |`);
+    lines.push(`| Total Cost | $${data.costSummary.totalCost.toFixed(4)} |`);
+    lines.push(``);
+
+    // Per-model breakdown
+    const modelMap = new Map<string, { calls: number; input: number; output: number; cost: number }>();
+    for (const usage of data.costSummary.usages) {
+      const existing = modelMap.get(usage.model) ?? { calls: 0, input: 0, output: 0, cost: 0 };
+      existing.calls++;
+      existing.input += usage.inputTokens;
+      existing.output += usage.outputTokens;
+      existing.cost += usage.cost;
+      modelMap.set(usage.model, existing);
+    }
+
+    if (modelMap.size > 1) {
+      lines.push(`### Per-Model Breakdown`);
+      lines.push(``);
+      lines.push(`| Model | Calls | Input Tokens | Output Tokens | Cost |`);
+      lines.push(`|---|---|---|---|---|`);
+      for (const [model, stats] of modelMap) {
+        lines.push(`| ${model} | ${stats.calls} | ${stats.input.toLocaleString()} | ${stats.output.toLocaleString()} | $${stats.cost.toFixed(4)} |`);
+      }
+      lines.push(``);
+    }
+
+    // Per-task cost
+    const taskCostMap = new Map<string, number>();
+    for (const usage of data.costSummary.usages) {
+      if (usage.taskId) {
+        taskCostMap.set(usage.taskId, (taskCostMap.get(usage.taskId) ?? 0) + usage.cost);
+      }
+    }
+
+    if (taskCostMap.size > 0) {
+      lines.push(`### Per-Task Cost`);
+      lines.push(``);
+      lines.push(`| Task ID | Cost |`);
+      lines.push(`|---|---|`);
+      for (const [taskId, cost] of [...taskCostMap.entries()].sort((a, b) => b[1] - a[1])) {
+        lines.push(`| \`${taskId}\` | $${cost.toFixed(4)} |`);
+      }
+      lines.push(``);
+    }
+  }
+
+  // Generated Files
+  if (data.generatedFiles.length > 0) {
+    lines.push(`## Generated Files`);
+    lines.push(``);
+
+    const byTask = new Map<string, string[]>();
+    for (const entry of data.generatedFiles) {
+      const files = byTask.get(entry.taskId) ?? [];
+      files.push(entry.filePath);
+      byTask.set(entry.taskId, files);
+    }
+
+    for (const [taskId, files] of byTask) {
+      lines.push(`### \`${taskId}\``);
+      lines.push(``);
+      for (const file of files) {
+        lines.push(`- \`${file}\``);
+      }
+      lines.push(``);
+    }
+  }
+
+  // Output Files
+  lines.push(`## Output Files`);
+  lines.push(``);
+  lines.push(`| File | Description |`);
+  lines.push(`|---|---|`);
+  if (data.assembledIndexPath) {
+    lines.push(`| \`docs/assembled-index.mts\` | Runnable Elysia app with all endpoint plugins wired |`);
+  }
+  if (data.documentationGenerated) {
+    lines.push(`| \`docs/hoppscotch-collection.json\` | API collection for Hoppscotch |`);
+  }
+  lines.push(`| \`plan.json\` | Task dependency graph |`);
+  lines.push(`| \`execution-summary.json\` | Task pass/fail status |`);
+  lines.push(`| \`integration-results.json\` | Integration test results |`);
+  lines.push(`| \`pipeline-result.json\` | Run metadata |`);
+  lines.push(`| \`logs/run.log\` | Full pipeline log |`);
+  lines.push(``);
+
+  return lines.join("\n");
+}
diff --git a/src/io/workspace.mts b/src/io/workspace.mts
index 0e3b81a..afaa021 100644
--- a/src/io/workspace.mts
+++ b/src/io/workspace.mts
@@ -113,4 +113,8 @@ export class Workspace {
   public taskQaKnowledgePath(taskId: string): string {
     return join(this.taskDir(taskId), `qa-knowledge.md`);
   }
+
+  public reportPath(): string {
+    return join(this.root, `report.md`);
+  }
 }
diff --git a/src/llm/anthropic-factory.mts b/src/llm/anthropic-factory.mts
index 361060e..300165f 100644
--- a/src/llm/anthropic-factory.mts
+++ b/src/llm/anthropic-factory.mts
@@ -1,11 +1,12 @@
 import { ChatAnthropic } from '@langchain/anthropic';
 import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
+import type { ILlmFactory } from '../interfaces/i-llm-factory.mjs';
 
 export interface AnthropicFactoryConfig {
   readonly apiKey: string;
 }
 
-export class AnthropicFactory {
+export class AnthropicFactory implements ILlmFactory {
 
   private readonly apiKey: string;
 
@@ -21,4 +22,8 @@ export class AnthropicFactory {
       maxTokens: 8192,
     });
   }
+
+  public createWithThinking(model: string, temperature: number): BaseChatModel {
+    return this.create(model, temperature);
+  }
 }
diff --git a/src/llm/cost-tracker.mts b/src/llm/cost-tracker.mts
new file mode 100644
index 0000000..c234ba1
--- /dev/null
+++ b/src/llm/cost-tracker.mts
@@ -0,0 +1,107 @@
+import type { Logger } from "winston";
+
+export interface LlmUsage {
+  readonly model: string;
+  readonly inputTokens: number;
+  readonly outputTokens: number;
+  readonly cost: number;
+  readonly taskId?: string;
+}
+
+// Cost per 1M tokens (input / output) as of 2026-04
+const MODEL_PRICING: Record<string, { input: number; output: number }> = {
+  // OpenAI
+  "gpt-5.4": { input: 2.50, output: 10.00 },
+  "gpt-4o": { input: 2.50, output: 10.00 },
+  "gpt-4o-mini": { input: 0.15, output: 0.60 },
+  // Anthropic
+  "claude-sonnet-4-6": { input: 3.00, output: 15.00 },
+  "claude-haiku-4-5": { input: 0.80, output: 4.00 },
+  "claude-opus-4-6": { input: 15.00, output: 75.00 },
+  // Ollama (local — free)
+  "qwen3.5:27b": { input: 0, output: 0 },
+  "qwen3-coder-next": { input: 0, output: 0 },
+};
+
+function lookupPricing(model: string): { input: number; output: number } {
+  if (MODEL_PRICING[model]) {
+    return MODEL_PRICING[model];
+  }
+  // Fuzzy match: check if model name starts with a known prefix
+  for (const [key, pricing] of Object.entries(MODEL_PRICING)) {
+    if (model.startsWith(key)) {
+      return pricing;
+    }
+  }
+  // Unknown model — assume zero (local)
+  return { input: 0, output: 0 };
+}
+
+export function calculateCost(model: string, inputTokens: number, outputTokens: number): number {
+  const pricing = lookupPricing(model);
+  return (inputTokens * pricing.input + outputTokens * pricing.output) / 1_000_000;
+}
+
+export class CostTracker {
+
+  private readonly usages: LlmUsage[] = [];
+  private readonly logger: Logger;
+
+  constructor(logger: Logger) {
+    this.logger = logger;
+  }
+
+  public record(model: string, inputTokens: number, outputTokens: number, taskId?: string): LlmUsage {
+    const cost = calculateCost(model, inputTokens, outputTokens);
+    const usage: LlmUsage = { model, inputTokens, outputTokens, cost, taskId };
+    this.usages.push(usage);
+
+    if (cost > 0) {
+      this.logger.info(
+        `[cost] ${model}: ${inputTokens} in / ${outputTokens} out = $${cost.toFixed(4)}${taskId ? ` (task: ${taskId})` : ""}`,
+      );
+    }
+
+    return usage;
+  }
+
+  public getTaskCost(taskId: string): number {
+    return this.usages
+      .filter((u) => u.taskId === taskId)
+      .reduce((sum, u) => sum + u.cost, 0);
+  }
+
+  public getTotalCost(): number {
+    return this.usages.reduce((sum, u) => sum + u.cost, 0);
+  }
+
+  public getTotalInputTokens(): number {
+    return this.usages.reduce((sum, u) => sum + u.inputTokens, 0);
+  }
+
+  public getTotalOutputTokens(): number {
+    return this.usages.reduce((sum, u) => sum + u.outputTokens, 0);
+  }
+
+  public getCallCount(): number {
+    return this.usages.length;
+  }
+
+  public getSummary(): CostSummary {
+    return {
+      totalInputTokens: this.getTotalInputTokens(),
+      totalOutputTokens: this.getTotalOutputTokens(),
+      totalCost: this.getTotalCost(),
+      callCount: this.getCallCount(),
+      usages: [...this.usages],
+    };
+  }
+}
+
+export interface CostSummary {
+  readonly totalInputTokens: number;
+  readonly totalOutputTokens: number;
+  readonly totalCost: number;
+  readonly callCount: number;
+  readonly usages: readonly LlmUsage[];
+}
diff --git a/src/llm/ollama-factory.mts b/src/llm/ollama-factory.mts
index 20ab5ec..6d133cd 100644
--- a/src/llm/ollama-factory.mts
+++ b/src/llm/ollama-factory.mts
@@ -1,4 +1,6 @@
 import { ChatOllama } from '@langchain/ollama';
+import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
+import type { ILlmFactory } from '../interfaces/i-llm-factory.mjs';
 
 export interface OllamaFactoryConfig {
   readonly host: string;
@@ -18,7 +20,7 @@ function createLongTimeoutFetch(timeoutMs: number): (input: string | URL | Reque
   };
 }
 
-export class OllamaFactory {
+export class OllamaFactory implements ILlmFactory {
 
   private readonly host: string;
   private readonly timeoutMs: number;
@@ -32,7 +34,7 @@ export class OllamaFactory {
       : {};
   }
 
-  public create(model: string, temperature: number): ChatOllama {
+  public create(model: string, temperature: number): BaseChatModel {
     return new ChatOllama({
       baseUrl: this.host,
       model,
@@ -46,7 +48,7 @@ export class OllamaFactory {
     } as ConstructorParameters<typeof ChatOllama>[0]);
   }
 
-  public createWithThinking(model: string, temperature: number): ChatOllama {
+  public createWithThinking(model: string, temperature: number): BaseChatModel {
     return new ChatOllama({
       baseUrl: this.host,
       model,
@@ -60,7 +62,7 @@ export class OllamaFactory {
     } as ConstructorParameters<typeof ChatOllama>[0]);
   }
 
-  public createWithJsonFormat(model: string, temperature: number): ChatOllama {
+  public createWithJsonFormat(model: string, temperature: number): BaseChatModel {
     return new ChatOllama({
       baseUrl: this.host,
       model,
diff --git a/src/llm/openai-factory.mts b/src/llm/openai-factory.mts
index f801558..125a41b 100644
--- a/src/llm/openai-factory.mts
+++ b/src/llm/openai-factory.mts
@@ -1,11 +1,12 @@
 import { ChatOpenAI } from '@langchain/openai';
 import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
+import type { ILlmFactory } from '../interfaces/i-llm-factory.mjs';
 
 export interface OpenAIFactoryConfig {
   readonly apiKey: string;
 }
 
-export class OpenAIFactory {
+export class OpenAIFactory implements ILlmFactory {
 
   private readonly apiKey: string;
 
@@ -21,4 +22,8 @@ export class OpenAIFactory {
       maxTokens: 16384,
     });
   }
+
+  public createWithThinking(model: string, temperature: number): BaseChatModel {
+    return this.create(model, temperature);
+  }
 }
diff --git a/src/llm/redact-secrets.mts b/src/llm/redact-secrets.mts
new file mode 100644
index 0000000..914afe2
--- /dev/null
+++ b/src/llm/redact-secrets.mts
@@ -0,0 +1,54 @@
+import winston from "winston";
+
+const SECRET_KEYS = new Set([
+  "ANTHROPIC_API_KEY",
+  "OPENAI_API_KEY",
+  "OLLAMA_API_KEY",
+  "LANGSMITH_API_KEY",
+  "apiKey",
+  "api_key",
+  "anthropicApiKey",
+  "openAIApiKey",
+]);
+
+const SECRET_PATTERNS = [
+  /sk-ant-[A-Za-z0-9_-]{20,}/g,
+  /sk-[A-Za-z0-9_-]{20,}/g,
+  /key-[A-Za-z0-9_-]{20,}/g,
+];
+
+function redactValue(value: unknown): unknown {
+  if (typeof value === "string") {
+    let result = value;
+    for (const pattern of SECRET_PATTERNS) {
+      result = result.replace(pattern, "[REDACTED]");
+    }
+    return result;
+  }
+  if (Array.isArray(value)) {
+    return value.map(redactValue);
+  }
+  if (value !== null && typeof value === "object") {
+    return redactObject(value as Record<string, unknown>);
+  }
+  return value;
+}
+
+function redactObject(obj: Record<string, unknown>): Record<string, unknown> {
+  const result: Record<string, unknown> = {};
+  for (const [key, val] of Object.entries(obj)) {
+    if (SECRET_KEYS.has(key)) {
+      result[key] = "[REDACTED]";
+    } else {
+      result[key] = redactValue(val);
+    }
+  }
+  return result;
+}
+
+export function redactSecrets(): winston.Logform.Format {
+  return winston.format((info) => {
+    const redacted = redactObject(info as unknown as Record<string, unknown>);
+    return redacted as unknown as winston.Logform.TransformableInfo;
+  })();
+}
diff --git a/src/llm/retry-with-backoff.mts b/src/llm/retry-with-backoff.mts
new file mode 100644
index 0000000..7dcb747
--- /dev/null
+++ b/src/llm/retry-with-backoff.mts
@@ -0,0 +1,57 @@
+import type { Logger } from "winston";
+import { classifyLlmError, type LlmError } from "../types/llm-errors.mts";
+
+export interface RetryConfig {
+  readonly maxRetries: number;
+  readonly baseDelayMs: number;
+  readonly maxDelayMs: number;
+}
+
+const DEFAULT_CONFIG: RetryConfig = {
+  maxRetries: 5,
+  baseDelayMs: 2000,
+  maxDelayMs: 120000,
+};
+
+function calculateDelay(attempt: number, config: RetryConfig): number {
+  const exponential = config.baseDelayMs * Math.pow(2, attempt);
+  const capped = Math.min(exponential, config.maxDelayMs);
+  const jitter = Math.random() * capped * 0.5;
+  return capped + jitter;
+}
+
+export async function retryWithBackoff<T>(
+  fn: () => Promise<T>,
+  logger: Logger,
+  label: string,
+  config: RetryConfig = DEFAULT_CONFIG,
+): Promise<T> {
+  let lastError: LlmError | undefined;
+
+  for (let attempt = 0; attempt <= config.maxRetries; attempt++) {
+    try {
+      return await fn();
+    } catch (error) {
+      lastError = classifyLlmError(error);
+
+      if (!lastError.retryable) {
+        logger.error(`[retry] ${label}: non-retryable error — ${lastError.name}: ${lastError.message}`);
+        throw lastError;
+      }
+
+      if (attempt >= config.maxRetries) {
+        logger.error(`[retry] ${label}: exhausted ${config.maxRetries} retries — ${lastError.name}: ${lastError.message}`);
+        throw lastError;
+      }
+
+      const delayMs = lastError.statusCode === 429 && "retryAfterMs" in lastError && lastError.retryAfterMs
+        ? (lastError.retryAfterMs as number)
+        : calculateDelay(attempt, config);
+
+      logger.warn(`[retry] ${label}: ${lastError.name} — retrying in ${Math.round(delayMs / 1000)}s (attempt ${attempt + 1}/${config.maxRetries})`);
+      await new Promise((resolve) => setTimeout(resolve, delayMs));
+    }
+  }
+
+  throw lastError ?? new Error(`${label}: retry loop exited unexpectedly`);
+}
diff --git a/src/llm/stream-invoke.mts b/src/llm/stream-invoke.mts
index 1a11140..ed4b8af 100644
--- a/src/llm/stream-invoke.mts
+++ b/src/llm/stream-invoke.mts
@@ -1,19 +1,43 @@
 import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
-import type { BaseMessage } from '@langchain/core/messages';
+import type { BaseMessage, AIMessageChunk } from '@langchain/core/messages';
+
+export interface StreamInvokeResult {
+  readonly content: string;
+  readonly inputTokens: number;
+  readonly outputTokens: number;
+}
 
 export async function streamInvoke(
   chatModel: BaseChatModel,
   messages: readonly BaseMessage[],
   traceConfig: Record<string, unknown>,
 ): Promise<string> {
+  const result = await streamInvokeWithUsage(chatModel, messages, traceConfig);
+  return result.content;
+}
+
+export async function streamInvokeWithUsage(
+  chatModel: BaseChatModel,
+  messages: readonly BaseMessage[],
+  traceConfig: Record<string, unknown>,
+): Promise<StreamInvokeResult> {
   const stream = await chatModel.stream([...messages], traceConfig);
   const chunks: string[] = [];
+  let inputTokens = 0;
+  let outputTokens = 0;
 
   for await (const chunk of stream) {
     const text = typeof chunk.content === 'string'
       ? chunk.content
       : JSON.stringify(chunk.content);
     chunks.push(text);
+
+    // Extract token usage from chunk metadata if available
+    const usage = (chunk as AIMessageChunk).usage_metadata;
+    if (usage) {
+      inputTokens = usage.input_tokens ?? inputTokens;
+      outputTokens = usage.output_tokens ?? outputTokens;
+    }
   }
 
   const raw = chunks.join('');
@@ -35,5 +59,5 @@ export async function streamInvoke(
     }
   }
 
-  return cleaned;
+  return { content: cleaned, inputTokens, outputTokens };
 }
diff --git a/src/orchestrator/fallback-fix-loop.mts b/src/orchestrator/fallback-fix-loop.mts
index 1587b59..3e6a8bc 100644
--- a/src/orchestrator/fallback-fix-loop.mts
+++ b/src/orchestrator/fallback-fix-loop.mts
@@ -4,7 +4,7 @@ import type { Task, TaskState } from '../types/task.mts';
 import type { Result } from '../types/result.mts';
 import type { AgentInput, AgentOutput } from '../types/agent-context.mts';
 import type { ModelChainConfig } from '../config/models.mts';
-import type { OllamaFactory } from '../llm/ollama-factory.mts';
+import type { ILlmFactory } from '../interfaces/i-llm-factory.mjs';
 import type { FallbackTier } from '../config/fallback-tiers.mts';
 import { CodegenAgent } from '../agents/codegen-agent.mts';
 import type { CodegenInput, CodegenOutput } from '../agents/codegen-agent.mts';
@@ -27,7 +27,7 @@ class FixedModelCodegenAgent extends CodegenAgent {
   constructor(
     fixedModel: BaseChatModel,
     fixedModelName: string,
-    llmFactory: OllamaFactory,
+    llmFactory: ILlmFactory,
     logger: Logger,
     timeoutMs?: number,
   ) {
@@ -74,9 +74,8 @@ export async function runFallbackFixLoop(
     logger.info(`[fallback] ═══ Escalating task ${task.id} to ${tier.name} (${tier.maxIterations} iterations) ═══`);
 
     const chatModel = tier.createChatModel();
-    // FixedModelCodegenAgent needs an OllamaFactory for the BaseAgent constructor,
+    // FixedModelCodegenAgent needs an ILlmFactory for the BaseAgent constructor,
     // but it won't use it since run() is overridden to use the fixed model directly.
-    // We pass a dummy factory — the localFactory from deps works fine.
     const fallbackAgent = new FixedModelCodegenAgent(
       chatModel,
       tier.model,
diff --git a/src/orchestrator/fix-loop.mts b/src/orchestrator/fix-loop.mts
index deaf368..3c0e780 100644
--- a/src/orchestrator/fix-loop.mts
+++ b/src/orchestrator/fix-loop.mts
@@ -8,7 +8,9 @@ import type { CodegenAgent, CodegenInput, CodeFile } from '../agents/codegen-age
 import type { EslintAgent } from '../agents/eslint-agent.mts';
 import type { QaAgent, QaInput } from '../agents/qa-agent.mts';
 import type { Workspace } from '../io/workspace.mts';
-import type { OllamaFactory } from '../llm/ollama-factory.mts';
+import type { ILlmFactory } from '../interfaces/i-llm-factory.mjs';
+import type { CostTracker } from '../llm/cost-tracker.mts';
+import { CostLimitExceededError } from '../types/llm-errors.mts';
 import { writeJson, writeCode, readAllCodeFiles } from '../io/file-protocol.mts';
 import { writeFile, mkdir, readFile } from 'node:fs/promises';
 import { CODEGEN_SYSTEM_PROMPT } from '../prompts/codegen-system-prompt.mts';
@@ -31,7 +33,9 @@ export interface FixLoopDeps {
   readonly qaAgent: QaAgent;
   readonly workspace: Workspace;
   readonly logger: Logger;
-  readonly dummyFactory?: OllamaFactory;
+  readonly dummyFactory?: ILlmFactory;
+  readonly costTracker?: CostTracker;
+  readonly taskCostLimit?: number;
 }
 
 export async function runFixLoop(
@@ -199,6 +203,33 @@ export async function runFixLoop(
     const codegenDurationMs = Math.round(performance.now() - codegenStartMs);
     logger.info(`[fix-loop] Step 1/3: CodeGen completed in ${codegenDurationMs}ms — ${allFiles.length} files (model: ${codegenResult.value.modelUsed})`);
 
+    // Record cost and check task cost ceiling
+    if (deps.costTracker) {
+      deps.costTracker.record(
+        codegenResult.value.modelUsed,
+        codegenResult.value.inputTokens,
+        codegenResult.value.outputTokens,
+        task.id,
+      );
+      const taskCost = deps.costTracker.getTaskCost(task.id);
+      const limit = deps.taskCostLimit ?? Infinity;
+      if (taskCost > limit) {
+        const costErr = new CostLimitExceededError(taskCost, limit);
+        logger.error(`[fix-loop] ${costErr.message}`);
+        await writeJson(workspace.taskStatusPath(task.id), {
+          status: 'failed',
+          iteration,
+          lastError: costErr.message,
+        });
+        return ok({
+          taskId: task.id,
+          status: 'failed',
+          iteration,
+          lastError: costErr.message,
+        });
+      }
+    }
+
     // Separate test files from code files BEFORE truncation so tests don't get cut
     const testFiles = allFiles.filter((f) => f.path.includes(`.test.mts`) || f.path.includes(`.test.ts`));
     let codeFiles: readonly CodeFile[] = allFiles.filter((f) => !f.path.includes(`.test.mts`) && !f.path.includes(`.test.ts`));
diff --git a/src/orchestrator/pipeline.mts b/src/orchestrator/pipeline.mts
index 11b6fed..0d078a0 100644
--- a/src/orchestrator/pipeline.mts
+++ b/src/orchestrator/pipeline.mts
@@ -16,7 +16,10 @@ import { executeGraph } from '../graph/parallel-executor.mts';
 import { runFixLoop } from './fix-loop.mts';
 import { runFallbackFixLoop } from './fallback-fix-loop.mts';
 import type { FallbackTier } from '../config/fallback-tiers.mts';
-import type { OllamaFactory } from '../llm/ollama-factory.mts';
+import type { ILlmFactory } from '../interfaces/i-llm-factory.mjs';
+import type { CostTracker } from '../llm/cost-tracker.mts';
+import { generateReport } from '../io/report-generator.mts';
+import type { GeneratedFileEntry } from '../io/report-generator.mts';
 import type { PlanningAgent } from '../agents/planning-agent.mts';
 import type { CodegenAgent } from '../agents/codegen-agent.mts';
 import type { EslintAgent } from '../agents/eslint-agent.mts';
@@ -44,7 +47,8 @@ export interface PipelineDeps {
   readonly documentationAgent: DocumentationAgent;
   readonly logger: Logger;
   readonly fallbackTiers?: readonly FallbackTier[];
-  readonly localFactory?: OllamaFactory;
+  readonly primaryFactory?: ILlmFactory;
+  readonly costTracker?: CostTracker;
 }
 
 export async function runPipeline(
@@ -186,7 +190,9 @@ export async function runPipeline(
         qaAgent: deps.qaAgent,
         workspace,
         logger,
-        dummyFactory: deps.localFactory,
+        dummyFactory: deps.primaryFactory,
+        costTracker: deps.costTracker,
+        taskCostLimit: config.taskCostLimit,
       };
       const fixConfig = { maxIterations: config.maxFixIterations, integrationPort: taskPort };
 
@@ -316,6 +322,53 @@ export async function runPipeline(
 
   logger.info(`Pipeline complete in ${durationMs}ms`);
 
+  // Phase 4: Generate run report
+  logger.info(`Phase 4: Generating run report`);
+
+  const generatedFiles: GeneratedFileEntry[] = [];
+  for (const task of taskGraph.tasks) {
+    const state = taskStates.get(task.id);
+    if (state?.status === 'completed') {
+      const codeDir = workspace.taskCodeDir(task.id);
+      const codeResult = await readAllCodeFiles(codeDir);
+      if (codeResult.ok) {
+        for (const [fileName] of codeResult.value) {
+          generatedFiles.push({ taskId: task.id, filePath: fileName });
+        }
+      }
+    }
+  }
+
+  // Check if assembled index was created
+  let assembledIndexPath: string | undefined;
+  try {
+    const assembledPath = `${workspace.docsDir()}/assembled-index.mts`;
+    await access(assembledPath);
+    assembledIndexPath = assembledPath;
+  } catch {
+    // No assembled index
+  }
+
+  const report = generateReport({
+    runId,
+    durationMs,
+    prdLength: prdText.length,
+    llmProvider: config.llmProvider,
+    llmProviderHost: config.llmProviderHost,
+    maxFixIterations: config.maxFixIterations,
+    maxConcurrency: config.maxConcurrency,
+    taskGraph,
+    taskStates: allStatesArr,
+    integrationResults,
+    documentationGenerated,
+    costSummary: deps.costTracker?.getSummary(),
+    generatedFiles,
+    assembledIndexPath,
+  });
+
+  await writeFile(workspace.reportPath(), report, 'utf-8');
+  logger.info(`Run report written to ${workspace.reportPath()}`);
+
   // Write final pipeline result
   await writeJson(`${workspace.root}/pipeline-result.json`, {
     runId,
diff --git a/src/types/agent-context.mts b/src/types/agent-context.mts
index 935811e..e57b760 100644
--- a/src/types/agent-context.mts
+++ b/src/types/agent-context.mts
@@ -11,5 +11,7 @@ export interface AgentOutput<T> {
   readonly payload: T;
   readonly modelUsed: string;
   readonly durationMs: number;
+  readonly inputTokens: number;
+  readonly outputTokens: number;
   readonly traceId?: string;
 }
diff --git a/src/types/llm-errors.mts b/src/types/llm-errors.mts
new file mode 100644
index 0000000..2e70a32
--- /dev/null
+++ b/src/types/llm-errors.mts
@@ -0,0 +1,89 @@
+export class LlmError extends Error {
+
+  public readonly statusCode?: number;
+  public readonly retryable: boolean;
+
+  constructor(message: string, statusCode?: number, retryable: boolean = false) {
+    super(message);
+    this.name = "LlmError";
+    this.statusCode = statusCode;
+    this.retryable = retryable;
+  }
+}
+
+export class RateLimitError extends LlmError {
+
+  public readonly retryAfterMs?: number;
+
+  constructor(message: string, retryAfterMs?: number) {
+    super(message, 429, true);
+    this.name = "RateLimitError";
+    this.retryAfterMs = retryAfterMs;
+  }
+}
+
+export class ContextWindowExceededError extends LlmError {
+
+  constructor(message: string) {
+    super(message, 400, false);
+    this.name = "ContextWindowExceededError";
+  }
+}
+
+export class AuthenticationError extends LlmError {
+
+  constructor(message: string) {
+    super(message, 401, false);
+    this.name = "AuthenticationError";
+  }
+}
+
+export class ModelUnavailableError extends LlmError {
+
+  constructor(message: string, statusCode?: number) {
+    super(message, statusCode, true);
+    this.name = "ModelUnavailableError";
+  }
+}
+
+export class CostLimitExceededError extends LlmError {
+
+  public readonly accumulatedCost: number;
+  public readonly limit: number;
+
+  constructor(accumulatedCost: number, limit: number) {
+    super(`Cost limit exceeded: $${accumulatedCost.toFixed(4)} > $${limit.toFixed(2)} limit`, undefined, false);
+    this.name = "CostLimitExceededError";
+    this.accumulatedCost = accumulatedCost;
+    this.limit = limit;
+  }
+}
+
+export function classifyLlmError(error: unknown): LlmError {
+  const msg = error instanceof Error ? error.message : String(error);
+  const msgLower = msg.toLowerCase();
+
+  if (msgLower.includes("rate limit") || msgLower.includes("429") || msgLower.includes("too many requests")) {
+    const retryMatch = msg.match(/retry.after[:\s]*(\d+)/i);
+    const retryAfterMs = retryMatch && retryMatch[1] ? parseInt(retryMatch[1], 10) * 1000 : undefined;
+    return new RateLimitError(msg, retryAfterMs);
+  }
+
+  if (msgLower.includes("context") && (msgLower.includes("length") || msgLower.includes("window") || msgLower.includes("exceeded"))) {
+    return new ContextWindowExceededError(msg);
+  }
+
+  if (msgLower.includes("401") || msgLower.includes("unauthorized") || msgLower.includes("invalid api key") || msgLower.includes("authentication")) {
+    return new AuthenticationError(msg);
+  }
+
+  if (msgLower.includes("503") || msgLower.includes("502") || msgLower.includes("overloaded") || msgLower.includes("unavailable")) {
+    return new ModelUnavailableError(msg, 503);
+  }
+
+  if (msgLower.includes("500") || msgLower.includes("internal server error")) {
+    return new ModelUnavailableError(msg, 500);
+  }
+
+  return new LlmError(msg, undefined, false);
+}
diff --git a/src/types/pipeline.mts b/src/types/pipeline.mts
index b31638e..512deb6 100644
--- a/src/types/pipeline.mts
+++ b/src/types/pipeline.mts
@@ -1,12 +1,15 @@
 import type { TaskState } from './task.mts';
+import type { LlmProvider } from '../config/env.mjs';
 
 export interface PipelineConfig {
   readonly maxFixIterations: number;
   readonly maxConcurrency: number;
   readonly workspaceDir: string;
-  readonly ollamaHost: string;
+  readonly llmProvider: LlmProvider;
+  readonly llmProviderHost?: string;
   readonly maxTasks?: number;
   readonly integrationPort: number;
+  readonly taskCostLimit: number;
 }
 
 export interface PipelineResult {