selfhosted-chat-api/api/app/backends.py at main · varad-more/selfhosted-chat-api · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
"""Backend abstraction for open-source LLM runtimes.

Every supported backend exposes an OpenAI-compatible HTTP surface. The
abstraction here normalises base URLs, auth headers, feature flags, and
model-listing behaviour so the gateway can target any of them uniformly.

Supported BACKEND_KIND values:

- ``vllm``      vLLM OpenAI server (default recommendation)
- ``ollama``    Ollama (expose /v1)
- ``llamacpp``  llama.cpp ``llama-server`` with --api
- ``tgi``       Hugging Face Text Generation Inference (OpenAI-compatible mode)
- ``sglang``    SGLang server in OpenAI-compatible mode
- ``localai``   LocalAI
- ``lmstudio``  LM Studio local server
- ``openai``    Generic OpenAI-compatible endpoint (any other project)

The gateway never assumes a single backend owns the feature set. Anything not
natively supported (e.g. embeddings on a runtime that lacks them) returns a
structured 501 so callers get a clean error instead of mystery failures.
"""

from __future__ import annotations

from dataclasses import dataclass, field

from .config import Settings


@dataclass(frozen=True)
class BackendCapabilities:
    chat: bool = True
    completions: bool = True
    models: bool = True
    embeddings: bool = True
    streaming: bool = True
    tools: bool = True


@dataclass(frozen=True)
class BackendProfile:
    kind: str
    display_name: str
    base_url: str
    health_path: str = "/health"
    models_path: str = "/models"
    chat_path: str = "/chat/completions"
    completions_path: str = "/completions"
    embeddings_path: str = "/embeddings"
    capabilities: BackendCapabilities = field(default_factory=BackendCapabilities)
    extra_headers: dict[str, str] = field(default_factory=dict)

    @property
    def health_url(self) -> str:
        root = self.base_url
        if root.endswith("/v1"):
            root = root[:-3]
        return f"{root.rstrip('/')}{self.health_path}"


_PROFILES: dict[str, BackendCapabilities] = {
    "vllm": BackendCapabilities(),
    "openai": BackendCapabilities(),
    "ollama": BackendCapabilities(embeddings=True),
    "llamacpp": BackendCapabilities(embeddings=True, tools=False),
    "tgi": BackendCapabilities(embeddings=False),
    "sglang": BackendCapabilities(),
    "localai": BackendCapabilities(),
    "lmstudio": BackendCapabilities(),
}

_DISPLAY: dict[str, str] = {
    "vllm": "vLLM",
    "openai": "OpenAI-compatible",
    "ollama": "Ollama",
    "llamacpp": "llama.cpp",
    "tgi": "Text Generation Inference",
    "sglang": "SGLang",
    "localai": "LocalAI",
    "lmstudio": "LM Studio",
}

_HEALTH_PATHS: dict[str, str] = {
    "ollama": "/api/tags",
    "llamacpp": "/health",
    "tgi": "/health",
    "lmstudio": "/v1/models",
}


def resolve_backend(settings: Settings) -> BackendProfile:
    kind = (settings.backend_kind or "openai").lower()
    if kind not in _PROFILES:
        kind = "openai"

    extra_headers: dict[str, str] = {}
    if settings.backend_api_key:
        extra_headers["Authorization"] = f"Bearer {settings.backend_api_key}"

    return BackendProfile(
        kind=kind,
        display_name=_DISPLAY.get(kind, kind),
        base_url=settings.backend_base_url.rstrip("/"),
        health_path=_HEALTH_PATHS.get(kind, "/health"),
        capabilities=_PROFILES[kind],
        extra_headers=extra_headers,
    )