-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathenv.sample
More file actions
68 lines (59 loc) · 3.78 KB
/
env.sample
File metadata and controls
68 lines (59 loc) · 3.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# Copy to .env and fill in your values.
# ── Provider ────────────────────────────────────────────────────────────────
# openrouter — cloud inference via OpenRouter (requires API key)
# ollama — local inference via Ollama (no API key, no cost)
# openai — any OpenAI-compatible server: LM Studio, vLLM, LocalAI, real OpenAI
PROVIDER=ollama
# ── OpenRouter (cloud) ──────────────────────────────────────────────────────
# API key — get one at https://openrouter.ai/keys
OPENROUTER_API_KEY=sk-or-v1-REPLACE_ME
# Default cloud vision model.
# Cheap + strong: google/gemini-2.5-flash (~$0.001/run) | google/gemini-2.5-flash-lite
# Free tier: nvidia/nemotron-nano-12b-v2-vl:free | google/gemma-3-27b-it:free
# Expensive: google/gemini-2.5-pro | openai/gpt-4o (warns before running)
OPENROUTER_MODEL=google/gemini-2.5-flash
# ── Ollama (local) ──────────────────────────────────────────────────────────
# Ollama server URL (default: http://localhost:11434)
OLLAMA_HOST=http://localhost:11434
# Default local model — requires 8 GB+ RAM minimum:
# qwen2.5vl:3b ~3.2 GB disk / ~5 GB RAM ★ recommended — 125K ctx, strong JSON, multi-image
# qwen2.5vl:7b ~6.0 GB disk / ~9 GB RAM best quality, needs 10+ GB RAM
# minicpm-v:8b ~5.5 GB disk / ~8 GB RAM strong OCR + reasoning, needs 10+ GB RAM
# Install: ollama pull qwen2.5vl:3b
OLLAMA_MODEL=qwen2.5vl:3b
# ── OpenAI-compatible (LM Studio, oMLX, vLLM, LocalAI, real OpenAI) ─────────
# Base URL for the OpenAI-compatible server (no trailing slash)
# LM Studio default: http://localhost:1234/v1
# oMLX default: http://localhost:8000/v1
# vLLM default: http://localhost:8000/v1
OPENAI_BASE_URL=http://localhost:1234/v1
# API key — use "lm-studio" for LM Studio, "not-needed" for vLLM/oMLX, real key for OpenAI
OPENAI_API_KEY=lm-studio
# Model name exactly as shown in the server (required)
# LM Studio example: qwen/qwen2.5-vl-7b-instruct
# oMLX example: mlx-community/Qwen2.5-VL-7B-Instruct-8bit
OPENAI_MODEL=
# Frame extraction knobs
SCENE_THRESHOLD=0.1 # 0..1, lower = more frames (hybrid: scene-change OR interval)
MIN_INTERVAL=2 # seconds between forced frames (fallback; scene-change fires first)
MAX_FRAMES=60 # HARD CAP — prevents runaway API cost
FRAME_WIDTH=512 # px; higher = more detail, more tokens
# ── Fallback (cross-provider or same-provider) ───────────────────────────────
# FALLBACK_PROVIDER: if set to a different provider, switches provider on failure
# Same provider: leave blank (auto-detects installed models)
# Cross-provider: set to ollama | openai | openrouter
FALLBACK_PROVIDER=
# FALLBACK_MODEL: model to try when primary fails (required for cross-provider)
# Same-provider example: qwen2.5vl:3b | qwen/qwen2.5-vl-3b-instruct
# Cross-provider example: qwen2.5vl:7b (if FALLBACK_PROVIDER=ollama)
FALLBACK_MODEL=
# FALLBACK_BASE_URL: base URL for fallback provider (openai-compat or ollama)
# Defaults to OPENAI_BASE_URL or OLLAMA_HOST if not set
FALLBACK_BASE_URL=
# FALLBACK_API_KEY: API key for fallback provider
# Defaults to OPENAI_API_KEY or OPENROUTER_API_KEY if not set
FALLBACK_API_KEY=
# Safety / cost caps
MAX_COST_USD=1.00 # abort mid-run if estimated spend exceeds this
BATCH_SIZE=0 # 0 = auto (one request if possible)
REQUEST_TIMEOUT=600 # per-API-call seconds