fortemi/.env.example at main · Fortemi/fortemi · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
# =============================================================================
# Matric Memory - Environment Variables
# =============================================================================
# Copy this file to .env and configure for your environment.
# Variables with values shown are defaults; commented variables are optional.
#
# For Docker bundle deployment, only the variables under "Docker Bundle" are
# needed. The compose file sets sensible defaults for everything else.
#
# For standalone (non-Docker) deployment, configure all sections as needed.

# =============================================================================
# Docker Bundle Deployment
# =============================================================================

# Hardware profile — controls which sidecars run and how they use GPU.
# Options:
#   edge     (default) — CPU sidecars, 6-8GB VRAM (RTX 3060/4060/5060)
#   gpu-12gb           — GPU sidecars, 12-16GB VRAM (RTX 3060 12GB/4070/5070)
#   gpu-24gb           — GPU sidecars, 24GB+ VRAM (RTX 3090/4090/5090)
COMPOSE_PROFILES=edge

# Container registry (default: ghcr.io for public images)
# Internal development: set to git.integrolabs.net
# FORTEMI_REGISTRY=ghcr.io

# Image tag to pull (default: bundle-latest)
# FORTEMI_TAG=bundle-latest

# External URL (REQUIRED for OAuth, MCP authentication, and public-facing links)
# This URL appears in upload guidance, export links, and OAuth metadata.
# ISSUER_URL=https://memory.example.com

# Internal API URL for MCP server → API communication (default: http://localhost:3000)
# In the Docker bundle, MCP and API run in the same container, so localhost is correct.
# Only change this for split deployments where MCP and API are separate containers.
# FORTEMI_URL=http://localhost:3000

# MCP OAuth credentials for token introspection
# Auto-managed: The bundle entrypoint auto-registers an MCP OAuth client on startup
# if credentials are missing or invalid (e.g., after a clean deploy with `down -v`).
# Credentials are persisted on the pgdata volume and reused across restarts.
# You only need to set these manually for standalone (non-Docker) deployments.
# MCP_CLIENT_ID=mm_xxx
# MCP_CLIENT_SECRET=xxx

# =============================================================================
# Database
# =============================================================================
# For standalone deployment only (Docker bundle uses POSTGRES_* vars)
DATABASE_URL=postgres://matric:matric@localhost:5432/matric

# =============================================================================
# Authentication (ADR-094 — fail-closed default)
# =============================================================================
# Default is REQUIRE_AUTH=true. Every /api/v1/* endpoint requires a valid
# Bearer token (OAuth2 access token or API key). This is the safe default
# for any deployment reachable beyond localhost.
# REQUIRE_AUTH=true

# To run anonymous (single-user desktop sidecar, local dev), BOTH must be set:
#   REQUIRE_AUTH=false
#   I_UNDERSTAND_NO_AUTH=true
# The startup gate refuses to launch with REQUIRE_AUTH=false unless
# I_UNDERSTAND_NO_AUTH=true is also present. A loud warning is emitted at
# startup and every 60 seconds for the process lifetime in anonymous mode.
# I_UNDERSTAND_NO_AUTH=false

# Multi-tenant deployments MUST run with authentication. Setting both
# FORTEMI_MULTI_TENANT=true and REQUIRE_AUTH=false is a startup error.
# FORTEMI_MULTI_TENANT=false

# OAuth access token lifetime in seconds (default: 3600 = 1 hour)
# Shorter = more secure, longer = less re-authentication friction
# OAUTH_TOKEN_LIFETIME_SECS=3600

# MCP token lifetime in seconds (default: 14400 = 4 hours)
# MCP sessions are interactive — shorter tokens cause mid-session disconnects
# OAUTH_MCP_TOKEN_LIFETIME_SECS=14400

# =============================================================================
# API Server
# =============================================================================
HOST=0.0.0.0
PORT=3000
RUST_LOG=info

# CORS - comma-separated list of allowed origins
# ALLOWED_ORIGINS=https://memory.example.com,http://localhost:3000

# Maximum request body size in bytes (default: 2 GB, for database backups)
# MATRIC_MAX_BODY_SIZE_BYTES=2147483648

# Maximum file upload size in bytes (default: 50 MB)
# MATRIC_MAX_UPLOAD_SIZE_BYTES=52428800

# =============================================================================
# Rate Limiting
# =============================================================================
# RATE_LIMIT_ENABLED=true
# RATE_LIMIT_REQUESTS=100
# RATE_LIMIT_PERIOD_SECS=60

# =============================================================================
# Logging
# =============================================================================
# LOG_FORMAT=json
# LOG_FILE=/var/log/matric/api.log
# LOG_ANSI=false

# =============================================================================
# Background Worker
# =============================================================================
# WORKER_ENABLED=true
# JOB_WORKER_ENABLED=true
# WORKER_THREADS=4
# JOB_POLL_INTERVAL_MS=60000
# JOB_MAX_CONCURRENT=4

# =============================================================================
# Chat (Synchronous LLM Conversation)
# =============================================================================
# Maximum concurrent chat requests hitting the GPU (default: 1).
# Chat bypasses the job queue and calls Ollama directly. This semaphore
# prevents VRAM contention. When all permits are in use, new requests get 503.
# Increase if you have enough VRAM for parallel inference.
# CHAT_MAX_CONCURRENT=1

# =============================================================================
# File Storage
# =============================================================================
# FILE_STORAGE_PATH=/var/lib/matric/files

# =============================================================================
# Real-Time Events
# =============================================================================
# MATRIC_EVENT_BUS_CAPACITY=256
# SSE_REPLAY_BUFFER_SIZE=1024
# SSE_COALESCE_WINDOW_MS=500
# MATRIC_WEBHOOK_TIMEOUT_SECS=10

# =============================================================================
# Real-Time ASR (Deepgram)
# =============================================================================
# Deepgram is the default production streaming ASR backend for realtime calls.
# Prefer DEEPGRAM_API_KEY_FILE for deployments managed by secret mounts; never
# put API keys in command-line arguments or committed config.
# DEEPGRAM_API_KEY=
# DEEPGRAM_API_KEY_FILE=/run/secrets/deepgram_api_key
# DEEPGRAM_MODEL=nova-3
# DEEPGRAM_LANGUAGE=en
# DEEPGRAM_ENCODING=linear16
# DEEPGRAM_SAMPLE_RATE_HZ=16000
# REALTIME_ASR_BACKEND_FALLBACK=

# =============================================================================
# Full-Text Search
# =============================================================================
# FTS_SCRIPT_DETECTION=true
# FTS_TRIGRAM_FALLBACK=true
# FTS_BIGRAM_CJK=true
# FTS_MULTILINGUAL_CONFIGS=true
# FTS_WEBSEARCH_TO_TSQUERY=true

# =============================================================================
# Redis Cache
# =============================================================================
# REDIS_ENABLED=true
# REDIS_URL=redis://localhost:6379
# REDIS_CACHE_TTL=300

# =============================================================================
# Backup
# =============================================================================
# BACKUP_DEST=/var/backups/matric-memory
# BACKUP_SCRIPT_PATH=/usr/local/bin/backup-matric.sh

# =============================================================================
# Inference Provider Selection
# =============================================================================
# Default provider for generation + embedding calls. One of:
#   ollama, openai, openrouter, llamacpp
# (See "Bring Your Own LLM" in README for the full per-profile recipe.)
# MATRIC_INFERENCE_DEFAULT=ollama

# Independent embedding-route override. Set to a provider id to route
# embedding calls separately from chat. Useful when the chat provider
# doesn't expose embeddings (e.g. OpenRouter):
#
#   MATRIC_INFERENCE_DEFAULT=openrouter
#   MATRIC_EMBEDDING_PROVIDER=ollama
#
# Must point at a registered provider with the Embedding capability.
# Misconfigurations log a warning at boot and fall back to the default.
# MATRIC_EMBEDDING_PROVIDER=ollama

# =============================================================================
# Ollama (local LLM)
# =============================================================================
# OLLAMA_BASE=http://localhost:11434
# OLLAMA_HOST=http://localhost:11434
# OLLAMA_EMBED_MODEL=nomic-embed-text
# OLLAMA_GEN_MODEL=qwen3.5:9b
# OLLAMA_EMBED_DIM=768
# MATRIC_OLLAMA_URL=http://localhost:11434
# MATRIC_OLLAMA_EMBEDDING_MODEL=nomic-embed-text
# MATRIC_OLLAMA_GENERATION_MODEL=qwen3.5:9b
# MATRIC_EMBED_TIMEOUT_SECS=30
# MATRIC_GEN_TIMEOUT_SECS=120

# =============================================================================
# OpenAI (alternative to Ollama)
# =============================================================================
# OPENAI_BASE_URL=https://api.openai.com/v1
# OPENAI_API_KEY=sk-xxx
# OPENAI_EMBED_MODEL=text-embedding-3-small
# OPENAI_GEN_MODEL=gpt-4o-mini
# OPENAI_EMBED_DIM=1536
# OPENAI_TIMEOUT=30
# OPENAI_SKIP_TLS_VERIFY=false
# OPENAI_HTTP_REFERER=https://memory.example.com
# OPENAI_X_TITLE=Matric Memory
# MATRIC_OPENAI_URL=https://api.openai.com/v1
# MATRIC_OPENAI_API_KEY=sk-xxx
# MATRIC_OPENAI_EMBEDDING_MODEL=text-embedding-3-small
# MATRIC_OPENAI_GENERATION_MODEL=gpt-4o-mini

# =============================================================================
# Fast Model (extraction pipeline)
# =============================================================================
# Small model for concept tagging, reference extraction, and title generation.
# Default: qwen3.5:9b. Set to empty to disable.
# Large documents are automatically chunked for the fast model.
# On failure, escalates to the standard model (OLLAMA_GEN_MODEL).
# MATRIC_FAST_GEN_MODEL=qwen3.5:9b
# MATRIC_FAST_GEN_TIMEOUT_SECS=60

# =============================================================================
# Extraction Services
# =============================================================================
# Extraction cascade: GLiNER → fast model → standard model (failover).
# GLiNER and Whisper are enabled by default in the Docker bundle.

# GLiNER NER sidecar (zero-shot named entity recognition)
# 0.5B BERT model, CPU-only, <300ms per document, 100-200x faster than LLM.
# Enabled by default in Docker bundle. Set to empty to disable.
# GLINER_BASE_URL=http://gliner:8090
# GLINER_MODEL=urchade/gliner_large-v2.1
# GLINER_THRESHOLD=0.3

# Target number of concepts per note (default: 5).
# GLiNER runs first; if it produces fewer than this, LLM supplements.
# Higher = richer taxonomy but slower (more LLM calls).
# EXTRACTION_TARGET_CONCEPTS=5

# Maximum document frequency ratio for concepts in embedding enrichment (#475).
# Concepts appearing in more than this fraction of notes are excluded as "stopwords".
# Lower = more aggressive filtering. Range: 0.01-1.0 (default: 0.8).
# EMBED_CONCEPT_MAX_DOC_FREQ=0.8

# Instruction prefix for embedding model (#472).
# nomic-embed-text supports: "clustering: ", "search_document: ", "classification: ".
# "clustering: " maximizes inter-cluster distance for graph linking.
# Set to empty string to disable prefix.
# EMBED_INSTRUCTION_PREFIX=clustering:

# Vision model for image extraction
# qwen3.5:9b is natively multimodal (unified generation and vision); also used as fast gen model
# Requires Ollama with vision model pulled (e.g., qwen3.5:9b)
# OLLAMA_VISION_MODEL=qwen3.5:9b

# Whisper transcription service
# Deploy via docker-compose.whisper.yml
# WHISPER_BASE_URL=http://host.docker.internal:8000
# WHISPER_MODEL=Systran/faster-distil-whisper-large-v3

# Speaker diarization (pyannote sidecar). Identifies who speaks when in
# multi-speaker audio/video. Set to empty to disable. Requires HF_TOKEN
# for gated pyannote models on first download.
# DIARIZATION_BASE_URL=http://pyannote:8001
# DIARIZATION_MODEL=pyannote/speaker-diarization-3.1
# HF_TOKEN=hf_your_token_here

# Three.js renderer for 3D model (GLB) extraction.
# Docker bundle includes renderer at localhost:8080. Set for external renderer.
# RENDERER_URL=http://localhost:8080

# OCR and document processing
# OCR_ENABLED=false
# LIBREOFFICE_PATH=/usr/bin/libreoffice

# =============================================================================
# Graph Linking
# =============================================================================
# HNSW-based graph linking configuration. These tune the knowledge graph
# structure and edge weight normalization (#470, #481).
# All graph vars are read at job execution time — no restart required.

# Linking strategy: hnsw_heuristic (default) or threshold (legacy).
# hnsw_heuristic: HNSW Algorithm 4 diverse neighbor selection.
# threshold: legacy epsilon-threshold (creates star topologies on clustered data).
# GRAPH_LINKING_STRATEGY=hnsw_heuristic

# Maximum neighbors per node (M in HNSW). 0 = adaptive (default).
# Adaptive: k = log2(N) clamped to [5, 15]. Set to explicit value (e.g., 8) to override.
# GRAPH_K_NEIGHBORS=0

# Absolute similarity floor — no links below this (default: 0.5).
# Range: 0.0-1.0.
# GRAPH_MIN_SIMILARITY=0.5

# Extend candidate set with neighbors-of-neighbors (Algorithm 4 option, default: false).
# Increases recall at the cost of more comparisons.
# GRAPH_EXTEND_CANDIDATES=false

# Normalization gamma exponent for edge weights (#470).
# 1.0 = linear (default). >1.0 amplifies top-end differences. <1.0 compresses them.
# Range: 0.1-5.0 (default: 1.0 = linear rescaling).
# GRAPH_NORMALIZATION_GAMMA=1.0

# Keep pruned candidates in HNSW neighbor selection (default: false).
# GRAPH_KEEP_PRUNED=false

# Weight for SKOS tag overlap in blended linking score (default: 0.3).
# blended = (embedding_sim * (1 - w)) + (tag_overlap * w).
# Set to 0.0 to disable tag-based boost. Range: 0.0-1.0.
# GRAPH_TAG_BOOST_WEIGHT=0.3

# PFNET q parameter for graph sparsification (#476).
# q=2 (default) is equivalent to Relative Neighborhood Graph.
# Higher q produces sparser graphs approaching MST.
# Range: 2-10 (default: 2).
# GRAPH_PFNET_Q=2

# Louvain community resolution parameter (#473).
# Higher = more, smaller communities. Lower = fewer, larger communities.
# Range: 0.1-10.0 (default: 1.0 = standard modularity).
# GRAPH_COMMUNITY_RESOLUTION=1.0

# SNN (Shared Nearest Neighbor) pruning threshold (#474).
# Edges with SNN score below this are pruned during recompute_snn_scores.
# SNN(A,B) = |kNN(A) ∩ kNN(B)| / k. Range: 0.0-1.0 (default: 0.10).
# GRAPH_SNN_THRESHOLD=0.10

# Structural collection edge score (#480).
# Controls the "gravity well" strength for same-collection edges in graph exploration.
# Before normalization: 0.5 is weaker than all semantic edges (0.70-0.94).
# After normalization: 0.5 is at median of [0.0, 1.0] range.
# Range: 0.0-1.0 (default: 0.5).
# GRAPH_STRUCTURAL_SCORE=0.5

# =============================================================================
# Support Memory Archive (fortemi-docs)
# =============================================================================
# Bundles the Fortemi documentation as a searchable in-product knowledge
# base — same content as the docs site, queryable via the same notes/search
# API as your own data. Off by default so the Docker bundle matches the
# native build (which never auto-seeds).
#
# Two ways to opt in:
#
# 1. Auto-seed on first boot — set this in .env BEFORE running `up`:
#      LOAD_SUPPORT_MEMORY=true
#
# 2. Manual seed at any time on a running instance — single command:
#      docker compose -f docker-compose.bundle.yml \
#        exec fortemi /app/seed-support-archive.sh
#    Idempotent — re-running is a no-op once seeded.
#
# Once seeded the archive lives at memory `fortemi-docs`. Reach it via:
#      curl -H 'X-Fortemi-Memory: fortemi-docs' \
#        'http://localhost:3000/api/v1/search?q=hybrid+search'
#
# By default only full-text search is populated — semantic search over
# the archive needs an additional reprocess step. See README "Resource
# Requirements" for the opt-in command.
# LOAD_SUPPORT_MEMORY=false

# Legacy hard-skip kept for back-compat with bundles that pre-date the
# opt-in flip. Setting this to true forces skip even if
# LOAD_SUPPORT_MEMORY=true. New deployments don't need this.
# DISABLE_SUPPORT_MEMORY=false

# =============================================================================
# Multi-Memory Architecture
# =============================================================================
# Maximum concurrent live memories (default: 10). Scale with hardware.
# MAX_MEMORIES=10

# Cache TTL in seconds for default archive lookup (default: 60).
# Reduces database calls for high-traffic deployments.
# DEFAULT_ARCHIVE_CACHE_TTL=60

# =============================================================================
# OpenRouter (alternative LLM provider — generation only, no embeddings)
# =============================================================================
# Opt-in via OPENROUTER_API_KEY. Enables OpenRouter as a peer provider for
# generation tasks (100+ models via a single API). Embeddings are NOT
# supported by OpenRouter; pair with MATRIC_EMBEDDING_PROVIDER above to
# embed locally.
# OPENROUTER_API_KEY=sk-or-v1-xxx
# OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
# OPENROUTER_GEN_MODEL=anthropic/claude-sonnet-4
# OPENROUTER_TIMEOUT=300
#
# OpenRouter routing rules + analytics use these headers. Fortemi defaults
# them to https://fortemi.io / Fortemi; override for downstream apps that
# ship Fortemi as a sidecar so usage attributes correctly.
# OPENROUTER_HTTP_REFERER=https://your-app.example.com
# OPENROUTER_APP_NAME=Your App

# =============================================================================
# llama.cpp (self-hosted, OpenAI-compatible protocol)
# =============================================================================
# Opt-in by setting LLAMACPP_BASE_URL. llama-server (the HTTP server bundled
# with llama.cpp) speaks the OpenAI-compatible protocol on /v1/*. Useful for
# operators who want full local inference without the Ollama daemon.
# LLAMACPP_BASE_URL=http://localhost:8080/v1
# LLAMACPP_API_KEY=               # only needed if launched with --api-key
# LLAMACPP_GEN_MODEL=             # required: matches --alias on llama-server
# LLAMACPP_EMBED_MODEL=           # optional: only if your build supports embeddings
# LLAMACPP_TIMEOUT=300

# =============================================================================
# MCP Server (standalone deployment)
# =============================================================================
# MCP_TRANSPORT=http
# MCP_PORT=3001
# MCP_BASE_URL=http://localhost:3000/mcp
# MCP_BASE_PATH=/mcp
# FORTEMI_URL=http://localhost:3000
# FORTEMI_API_KEY=