-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbackends.py
More file actions
107 lines (87 loc) · 3.24 KB
/
backends.py
File metadata and controls
107 lines (87 loc) · 3.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
"""Backend abstraction for open-source LLM runtimes.
Every supported backend exposes an OpenAI-compatible HTTP surface. The
abstraction here normalises base URLs, auth headers, feature flags, and
model-listing behaviour so the gateway can target any of them uniformly.
Supported BACKEND_KIND values:
- ``vllm`` vLLM OpenAI server (default recommendation)
- ``ollama`` Ollama (expose /v1)
- ``llamacpp`` llama.cpp ``llama-server`` with --api
- ``tgi`` Hugging Face Text Generation Inference (OpenAI-compatible mode)
- ``sglang`` SGLang server in OpenAI-compatible mode
- ``localai`` LocalAI
- ``lmstudio`` LM Studio local server
- ``openai`` Generic OpenAI-compatible endpoint (any other project)
The gateway never assumes a single backend owns the feature set. Anything not
natively supported (e.g. embeddings on a runtime that lacks them) returns a
structured 501 so callers get a clean error instead of mystery failures.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from .config import Settings
@dataclass(frozen=True)
class BackendCapabilities:
chat: bool = True
completions: bool = True
models: bool = True
embeddings: bool = True
streaming: bool = True
tools: bool = True
@dataclass(frozen=True)
class BackendProfile:
kind: str
display_name: str
base_url: str
health_path: str = "/health"
models_path: str = "/models"
chat_path: str = "/chat/completions"
completions_path: str = "/completions"
embeddings_path: str = "/embeddings"
capabilities: BackendCapabilities = field(default_factory=BackendCapabilities)
extra_headers: dict[str, str] = field(default_factory=dict)
@property
def health_url(self) -> str:
root = self.base_url
if root.endswith("/v1"):
root = root[:-3]
return f"{root.rstrip('/')}{self.health_path}"
_PROFILES: dict[str, BackendCapabilities] = {
"vllm": BackendCapabilities(),
"openai": BackendCapabilities(),
"ollama": BackendCapabilities(embeddings=True),
"llamacpp": BackendCapabilities(embeddings=True, tools=False),
"tgi": BackendCapabilities(embeddings=False),
"sglang": BackendCapabilities(),
"localai": BackendCapabilities(),
"lmstudio": BackendCapabilities(),
}
_DISPLAY: dict[str, str] = {
"vllm": "vLLM",
"openai": "OpenAI-compatible",
"ollama": "Ollama",
"llamacpp": "llama.cpp",
"tgi": "Text Generation Inference",
"sglang": "SGLang",
"localai": "LocalAI",
"lmstudio": "LM Studio",
}
_HEALTH_PATHS: dict[str, str] = {
"ollama": "/api/tags",
"llamacpp": "/health",
"tgi": "/health",
"lmstudio": "/v1/models",
}
def resolve_backend(settings: Settings) -> BackendProfile:
kind = (settings.backend_kind or "openai").lower()
if kind not in _PROFILES:
kind = "openai"
extra_headers: dict[str, str] = {}
if settings.backend_api_key:
extra_headers["Authorization"] = f"Bearer {settings.backend_api_key}"
return BackendProfile(
kind=kind,
display_name=_DISPLAY.get(kind, kind),
base_url=settings.backend_base_url.rstrip("/"),
health_path=_HEALTH_PATHS.get(kind, "/health"),
capabilities=_PROFILES[kind],
extra_headers=extra_headers,
)