-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMakefile
More file actions
87 lines (66 loc) · 2.31 KB
/
Makefile
File metadata and controls
87 lines (66 loc) · 2.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
.PHONY: help install dev lint test run build up down logs ps health env-vllm env-ollama env-llamacpp env-tgi env-sglang env-localai env-external
BACKEND ?= vllm
API_DIR := api
CONDA_ENV ?= selfhosted-chat-api
PROFILE := $(BACKEND)
help:
@echo "Targets:"
@echo " install Install runtime + dev deps into the current Python env"
@echo " dev Run the gateway locally with uvicorn --reload"
@echo " lint Ruff check"
@echo " test Run pytest"
@echo " build Build the API docker image"
@echo " up BACKEND=vllm Start the stack with the chosen backend profile"
@echo " (vllm|ollama|llamacpp|tgi|sglang|localai|none)"
@echo " down docker compose down"
@echo " logs Tail gateway + backend logs"
@echo " ps Show compose status"
@echo " health Curl /health against 127.0.0.1:8000"
@echo " env-<backend> Copy the matching deploy/env/*.env to ./.env"
install:
cd $(API_DIR) && pip install -r requirements-dev.txt
dev:
cd $(API_DIR) && uvicorn app.main:app --reload --host 127.0.0.1 --port 8000
lint:
cd $(API_DIR) && ruff check app tests
test:
cd $(API_DIR) && pytest -q
build:
docker compose build api
up:
docker compose --profile $(PROFILE) up -d --build
down:
docker compose down
logs:
docker compose logs -f api $(PROFILE)
ps:
docker compose ps
health:
curl -fsS http://127.0.0.1:8000/health | python -m json.tool
env-vllm:
cp deploy/env/vllm.env .env
env-ollama:
cp deploy/env/ollama.env .env
env-llamacpp:
cp deploy/env/llamacpp.env .env
env-tgi:
cp deploy/env/tgi.env .env
env-sglang:
cp deploy/env/sglang.env .env
env-localai:
cp deploy/env/localai.env .env
env-external:
cp deploy/env/external.env .env
env-demo:
cp deploy/env/demo.env .env
demo: env-demo
docker compose --profile demo up -d --build
@echo ""
@echo "Pulling a tiny model (runs on CPU):"
@docker exec -it ollama-demo ollama pull qwen2.5:0.5b-instruct
@echo ""
@echo "Ready. Try:"
@echo " curl http://127.0.0.1:8000/health"
@echo " curl http://127.0.0.1:8000/v1/chat/completions \\"
@echo " -H 'Authorization: Bearer demo-key' -H 'Content-Type: application/json' \\"
@echo " -d '{\"model\":\"qwen2.5:0.5b-instruct\",\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}]}'"