-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathBACKEND_MATRIX.json
More file actions
101 lines (101 loc) · 2.61 KB
/
BACKEND_MATRIX.json
File metadata and controls
101 lines (101 loc) · 2.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
{
"version": "1.0.0",
"defaultBackend": "auto",
"autoFallbackOrder": [
"lmstudio",
"ollama",
"ugrad",
"huggingface",
"mistral"
],
"backends": {
"lmstudio": {
"protocol": "openai_compatible",
"baseUrlEnv": "LOCAL_LLM_BASE_URL",
"modelEnv": "LOCAL_LLM_MODEL",
"apiKeyEnv": "LOCAL_LLM_API_KEY",
"defaultBaseUrl": "http://127.0.0.1:1234/v1",
"notes": "Use for local LM Studio model serving."
},
"ollama": {
"protocol": "ollama_api",
"baseUrlEnv": "OLLAMA_BASE_URL",
"modelEnv": "OLLAMA_MODEL",
"defaultBaseUrl": "http://127.0.0.1:11434",
"notes": "Use for local Ollama models."
},
"ugrad": {
"protocol": "openai_compatible",
"baseUrlEnv": "UGRAD_MODELLAB_BASE_URL",
"modelEnv": "UGRAD_MODELLAB_MODEL",
"apiKeyEnv": "UGRAD_MODELLAB_API_KEY",
"notes": "Use for ugrad Model Lab gateway."
},
"huggingface": {
"protocol": "hf_inference_api",
"tokenEnv": "HF_API_TOKEN",
"modelEnv": "HF_MODEL",
"notes": "Hosted inference via Hugging Face API."
},
"mistral": {
"protocol": "mistral_sdk",
"apiKeyEnv": "MISTRAL_API_KEY",
"modelEnv": "MISTRAL_MODEL",
"notes": "Hosted inference via Mistral SDK."
}
},
"modelFamilyAliases": {
"llama2": {
"ollama": "llama2",
"huggingface": "meta-llama/Llama-2-7b-chat-hf",
"lmstudio": "llama-2",
"ugrad": "llama-2"
},
"llama3": {
"ollama": "llama3.1",
"huggingface": "meta-llama/Llama-3.1-8B-Instruct",
"lmstudio": "llama-3",
"ugrad": "llama-3"
},
"mistral": {
"ollama": "mistral",
"huggingface": "mistralai/Mistral-7B-Instruct-v0.3",
"lmstudio": "mistral",
"ugrad": "mistral",
"mistral": "mistral-small-latest"
},
"falcon": {
"ollama": "falcon",
"huggingface": "tiiuae/falcon-7b-instruct",
"lmstudio": "falcon",
"ugrad": "falcon"
},
"gptneo": {
"ollama": "gpt-neo",
"huggingface": "EleutherAI/gpt-neo-2.7B",
"lmstudio": "gpt-neo",
"ugrad": "gpt-neo"
},
"codellama": {
"ollama": "codellama",
"huggingface": "meta-llama/CodeLlama-7b-Instruct-hf",
"lmstudio": "codellama",
"ugrad": "codellama"
},
"vicuna": {
"ollama": "vicuna",
"huggingface": "lmsys/vicuna-7b-v1.5",
"lmstudio": "vicuna",
"ugrad": "vicuna"
}
},
"security": {
"neverCommitSecrets": true,
"requiredSecretEnvVars": [
"MISTRAL_API_KEY",
"HF_API_TOKEN",
"UGRAD_MODELLAB_API_KEY",
"LOCAL_LLM_API_KEY"
]
}
}