-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path.env.example
More file actions
65 lines (51 loc) · 1.65 KB
/
.env.example
File metadata and controls
65 lines (51 loc) · 1.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# -----------------------------------------------------------------------------
# Self-hosted chat API configuration.
# Copy to .env and adjust. BACKEND_KIND must match the compose profile you run.
# -----------------------------------------------------------------------------
# --- Gateway ---
API_HOST=127.0.0.1
API_PORT=8000
# Comma-separated API keys. At least one key is required for production.
# Leave blank only for isolated private testing on an unreachable network.
API_KEYS=change-me-now
# CORS. Restrict in production.
CORS_ORIGINS=*
CORS_ALLOW_CREDENTIALS=false
# Rate limiting (simple in-process token bucket; put a real limiter in front
# for multi-replica deployments).
RATE_LIMIT_ENABLED=false
RATE_LIMIT_RPM=120
RATE_LIMIT_BURST=30
# Timeouts.
REQUEST_TIMEOUT_S=600
CONNECT_TIMEOUT_S=10
# Logging.
LOG_LEVEL=INFO
LOG_JSON=true
LOG_PROMPTS=false
# Metrics endpoint on /metrics (Prometheus text format).
METRICS_ENABLED=true
# --- Backend selection ---
# Supported: vllm | ollama | llamacpp | tgi | sglang | localai | lmstudio | openai
BACKEND_KIND=vllm
BACKEND_BASE_URL=http://vllm:8001/v1
# Set only if the backend requires its own API key (llama.cpp --api-key, etc.)
BACKEND_API_KEY=
# Default model displayed in docs and health output.
MODEL_NAME=Qwen/Qwen2.5-7B-Instruct
# --- vLLM-specific ---
VLLM_GPU_MEMORY_UTILIZATION=0.92
VLLM_MAX_MODEL_LEN=16384
VLLM_DTYPE=half
# --- llama.cpp-specific ---
LLAMACPP_MODEL_FILE=model.gguf
LLAMACPP_NGL=999
LLAMACPP_CTX=8192
LLAMACPP_PARALLEL=2
# --- TGI-specific ---
TGI_MAX_INPUT=8192
TGI_MAX_TOTAL=16384
# --- SGLang-specific ---
SGLANG_CTX=16384
# --- Hugging Face gated model downloads ---
HUGGINGFACE_HUB_TOKEN=