Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 25 additions & 8 deletions clients/openai_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@ var _ cogito.LLM = (*OpenAIClient)(nil)
var _ cogito.StreamingLLM = (*OpenAIClient)(nil)

type OpenAIClient struct {
model string
client *openai.Client
temperature float32
metadata map[string]string
model string
client *openai.Client
temperature float32
metadata map[string]string
reasoningEffort string
}

// OpenAIOptions carries optional per-client settings.
Expand All @@ -26,6 +27,12 @@ type OpenAIOptions struct {
// OpenAI "metadata" object. Backends such as LocalAI use it to carry
// per-request flags, e.g. {"enable_thinking": "false"} to disable reasoning.
Metadata map[string]string
// ReasoningEffort sets the OpenAI "reasoning_effort" field on every request
// (e.g. "none"/"low"/"medium"/"high"). This is the portable, OpenAI-standard
// control for reasoning models — unlike Metadata, it binds even when the
// model's chat template has no enable_thinking toggle (e.g. LFM2.5), so it's
// the reliable way to disable thinking. Empty leaves the field unset.
ReasoningEffort string
}

func NewOpenAILLM(model, apiKey, baseURL string) *OpenAIClient {
Expand All @@ -36,10 +43,11 @@ func NewOpenAILLMWithOptions(model, apiKey, baseURL string, opts OpenAIOptions)
client := openaiClient(apiKey, baseURL)

return &OpenAIClient{
model: model,
client: client,
temperature: opts.Temperature,
metadata: opts.Metadata,
model: model,
client: client,
temperature: opts.Temperature,
metadata: opts.Metadata,
reasoningEffort: opts.ReasoningEffort,
}
}

Expand All @@ -63,6 +71,9 @@ func (llm *OpenAIClient) Ask(ctx context.Context, f cogito.Fragment) (cogito.Fra
if len(llm.metadata) > 0 {
req.Metadata = llm.metadata
}
if llm.reasoningEffort != "" {
req.ReasoningEffort = llm.reasoningEffort
}

resp, err := llm.client.CreateChatCompletion(ctx, req)

Expand Down Expand Up @@ -95,6 +106,9 @@ func (llm *OpenAIClient) CreateChatCompletion(ctx context.Context, request opena
if len(llm.metadata) > 0 {
request.Metadata = llm.metadata
}
if llm.reasoningEffort != "" {
request.ReasoningEffort = llm.reasoningEffort
}
response, err := llm.client.CreateChatCompletion(ctx, request)
if err != nil {
return cogito.LLMReply{}, cogito.LLMUsage{}, err
Expand Down Expand Up @@ -122,6 +136,9 @@ func (llm *OpenAIClient) CreateChatCompletionStream(ctx context.Context, request
if len(llm.metadata) > 0 {
request.Metadata = llm.metadata
}
if llm.reasoningEffort != "" {
request.ReasoningEffort = llm.reasoningEffort
}

stream, err := llm.client.CreateChatCompletionStream(ctx, request)
if err != nil {
Expand Down
65 changes: 65 additions & 0 deletions clients/openai_client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"io"
"net/http"
"net/http/httptest"
"strings"
"testing"

"github.com/sashabaranov/go-openai"
Expand Down Expand Up @@ -50,6 +51,70 @@ func TestCreateChatCompletionSendsMetadata(t *testing.T) {
}
}

func TestNewOpenAILLMWithOptionsSetsReasoningEffort(t *testing.T) {
llm := NewOpenAILLMWithOptions("m", "k", "http://x/v1", OpenAIOptions{
ReasoningEffort: "none",
})
if llm.reasoningEffort != "none" {
t.Fatalf("expected reasoningEffort=none, got %q", llm.reasoningEffort)
}
}

// TestCreateChatCompletionSendsReasoningEffort verifies the configured
// reasoning_effort is serialized into the outgoing request body. This is the
// portable control that disables thinking on reasoning models whose chat
// template has no enable_thinking toggle.
func TestCreateChatCompletionSendsReasoningEffort(t *testing.T) {
var gotEffort string
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
body, _ := io.ReadAll(r.Body)
var req struct {
ReasoningEffort string `json:"reasoning_effort"`
}
_ = json.Unmarshal(body, &req)
gotEffort = req.ReasoningEffort
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"choices":[{"index":0,"message":{"role":"assistant","content":"ok"}}]}`))
}))
defer srv.Close()

llm := NewOpenAILLMWithOptions("m", "k", srv.URL+"/v1", OpenAIOptions{
ReasoningEffort: "none",
})
_, _, err := llm.CreateChatCompletion(context.Background(), openai.ChatCompletionRequest{
Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "hi"}},
})
if err != nil {
t.Fatalf("CreateChatCompletion: %v", err)
}
if gotEffort != "none" {
t.Fatalf("request reasoning_effort = %q, want none", gotEffort)
}
}

// TestCreateChatCompletionOmitsReasoningEffortWhenUnset proves an unset option
// sends no reasoning_effort field (so default behaviour is unchanged).
func TestCreateChatCompletionOmitsReasoningEffortWhenUnset(t *testing.T) {
var present bool
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
body, _ := io.ReadAll(r.Body)
present = bytesContains(body, "reasoning_effort")
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"choices":[{"index":0,"message":{"role":"assistant","content":"ok"}}]}`))
}))
defer srv.Close()

llm := NewOpenAILLMWithOptions("m", "k", srv.URL+"/v1", OpenAIOptions{})
_, _, _ = llm.CreateChatCompletion(context.Background(), openai.ChatCompletionRequest{
Messages: []openai.ChatCompletionMessage{{Role: "user", Content: "hi"}},
})
if present {
t.Fatal("reasoning_effort must be omitted when unset")
}
}

func bytesContains(b []byte, s string) bool { return strings.Contains(string(b), s) }

func TestNewOpenAILLMWithOptionsSetsTemperature(t *testing.T) {
llm := NewOpenAILLMWithOptions("m", "k", "http://localhost", OpenAIOptions{Temperature: 0.7})
if llm.temperature != 0.7 {
Expand Down
Loading