Skip to content

Commit f858922

Browse files
committed
Add transient upstream cooldown config
1 parent 1fca942 commit f858922

8 files changed

Lines changed: 156 additions & 52 deletions

File tree

config.example.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,10 @@ max-retry-interval: 30
102102
# When true, disable auth/model cooldown scheduling globally (prevents blackout windows after failure states).
103103
disable-cooling: false
104104

105+
# Auth/model cooldown in seconds after transient upstream errors (408/500/502/503/504).
106+
# Set to 0 to disable only this transient-error cooldown while keeping auth/quota cooldowns.
107+
transient-error-cooldown-seconds: 60
108+
105109
# disable-image-generation supports: false (default), true, or "chat".
106110
# - true: disable image_generation everywhere (also returns 404 for /v1/images/generations and /v1/images/edits).
107111
# - "chat": disable image_generation injection on non-images endpoints, but keep /v1/images/generations and /v1/images/edits enabled.

internal/api/server.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk
276276
}
277277
managementasset.SetCurrentConfig(cfg)
278278
auth.SetQuotaCooldownDisabled(cfg.DisableCooling)
279+
auth.SetTransientErrorCooldown(time.Duration(cfg.TransientErrorCooldownSeconds) * time.Second)
279280
applySignatureCacheConfig(nil, cfg)
280281
// Initialize management handler
281282
s.mgmt = managementHandlers.NewHandler(cfg, configFilePath, authManager)
@@ -1228,6 +1229,9 @@ func (s *Server) UpdateClients(cfg *config.Config) {
12281229
if oldCfg == nil || oldCfg.DisableCooling != cfg.DisableCooling {
12291230
auth.SetQuotaCooldownDisabled(cfg.DisableCooling)
12301231
}
1232+
if oldCfg == nil || oldCfg.TransientErrorCooldownSeconds != cfg.TransientErrorCooldownSeconds {
1233+
auth.SetTransientErrorCooldown(time.Duration(cfg.TransientErrorCooldownSeconds) * time.Second)
1234+
}
12311235

12321236
if oldCfg != nil && oldCfg.DisableImageGeneration != cfg.DisableImageGeneration {
12331237
log.Infof("disable-image-generation updated: %v -> %v", oldCfg.DisableImageGeneration, cfg.DisableImageGeneration)

internal/config/config.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@ type Config struct {
7777
// DisableCooling disables quota cooldown scheduling when true.
7878
DisableCooling bool `yaml:"disable-cooling" json:"disable-cooling"`
7979

80+
// TransientErrorCooldownSeconds controls auth/model cooldown after transient upstream failures.
81+
// Applies to 408/500/502/503/504 responses. Set to 0 to disable only this cooldown.
82+
TransientErrorCooldownSeconds int `yaml:"transient-error-cooldown-seconds" json:"transient-error-cooldown-seconds"`
83+
8084
// AuthAutoRefreshWorkers overrides the size of the core auth auto-refresh worker pool.
8185
// When <= 0, the default worker count is used.
8286
AuthAutoRefreshWorkers int `yaml:"auth-auto-refresh-workers" json:"auth-auto-refresh-workers"`
@@ -626,6 +630,7 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
626630
cfg.UsageStatisticsEnabled = false
627631
cfg.RedisUsageQueueRetentionSeconds = 60
628632
cfg.DisableCooling = false
633+
cfg.TransientErrorCooldownSeconds = 60
629634
cfg.DisableImageGeneration = DisableImageGenerationOff
630635
cfg.Pprof.Enable = false
631636
cfg.Pprof.Addr = DefaultPprofAddr
@@ -693,6 +698,10 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) {
693698
log.WithField("value", cfg.RedisUsageQueueRetentionSeconds).Warn("redis-usage-queue-retention-seconds too large; clamping to 3600")
694699
cfg.RedisUsageQueueRetentionSeconds = 3600
695700
}
701+
if cfg.TransientErrorCooldownSeconds < 0 {
702+
log.WithField("value", cfg.TransientErrorCooldownSeconds).Warn("transient-error-cooldown-seconds cannot be negative; clamping to 0")
703+
cfg.TransientErrorCooldownSeconds = 0
704+
}
696705

697706
if cfg.MaxRetryCredentials < 0 {
698707
cfg.MaxRetryCredentials = 0

internal/config/parse.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ func ParseConfigBytes(data []byte) (*Config, error) {
2525
cfg.UsageStatisticsEnabled = false
2626
cfg.RedisUsageQueueRetentionSeconds = 60
2727
cfg.DisableCooling = false
28+
cfg.TransientErrorCooldownSeconds = 60
2829
cfg.DisableImageGeneration = DisableImageGenerationOff
2930
cfg.Pprof.Enable = false
3031
cfg.Pprof.Addr = DefaultPprofAddr
@@ -68,6 +69,10 @@ func ParseConfigBytes(data []byte) (*Config, error) {
6869
log.WithField("value", cfg.RedisUsageQueueRetentionSeconds).Warn("redis-usage-queue-retention-seconds too large; clamping to 3600")
6970
cfg.RedisUsageQueueRetentionSeconds = 3600
7071
}
72+
if cfg.TransientErrorCooldownSeconds < 0 {
73+
log.WithField("value", cfg.TransientErrorCooldownSeconds).Warn("transient-error-cooldown-seconds cannot be negative; clamping to 0")
74+
cfg.TransientErrorCooldownSeconds = 0
75+
}
7176

7277
if cfg.MaxRetryCredentials < 0 {
7378
cfg.MaxRetryCredentials = 0

internal/watcher/diff/config_diff.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ func BuildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
4545
if oldCfg.DisableCooling != newCfg.DisableCooling {
4646
changes = append(changes, fmt.Sprintf("disable-cooling: %t -> %t", oldCfg.DisableCooling, newCfg.DisableCooling))
4747
}
48+
if oldCfg.TransientErrorCooldownSeconds != newCfg.TransientErrorCooldownSeconds {
49+
changes = append(changes, fmt.Sprintf("transient-error-cooldown-seconds: %d -> %d", oldCfg.TransientErrorCooldownSeconds, newCfg.TransientErrorCooldownSeconds))
50+
}
4851
if oldCfg.DisableImageGeneration != newCfg.DisableImageGeneration {
4952
changes = append(changes, fmt.Sprintf("disable-image-generation: %v -> %v", oldCfg.DisableImageGeneration, newCfg.DisableImageGeneration))
5053
}

internal/watcher/diff/config_diff_test.go

Lines changed: 54 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -219,21 +219,22 @@ func TestBuildConfigChangeDetails_SecretsAndCounts(t *testing.T) {
219219

220220
func TestBuildConfigChangeDetails_FlagsAndKeys(t *testing.T) {
221221
oldCfg := &config.Config{
222-
Port: 1000,
223-
AuthDir: "/old",
224-
Debug: false,
225-
LoggingToFile: false,
226-
UsageStatisticsEnabled: false,
227-
DisableCooling: false,
228-
RequestRetry: 1,
229-
MaxRetryCredentials: 1,
230-
MaxRetryInterval: 1,
231-
WebsocketAuth: false,
232-
QuotaExceeded: config.QuotaExceeded{SwitchProject: false, SwitchPreviewModel: false, AntigravityCredits: false},
233-
ClaudeKey: []config.ClaudeKey{{APIKey: "c1"}},
234-
CodexKey: []config.CodexKey{{APIKey: "x1"}},
235-
AmpCode: config.AmpCode{UpstreamAPIKey: "keep", RestrictManagementToLocalhost: false},
236-
RemoteManagement: config.RemoteManagement{DisableControlPanel: false, PanelGitHubRepository: "old/repo", SecretKey: "keep"},
222+
Port: 1000,
223+
AuthDir: "/old",
224+
Debug: false,
225+
LoggingToFile: false,
226+
UsageStatisticsEnabled: false,
227+
DisableCooling: false,
228+
TransientErrorCooldownSeconds: 60,
229+
RequestRetry: 1,
230+
MaxRetryCredentials: 1,
231+
MaxRetryInterval: 1,
232+
WebsocketAuth: false,
233+
QuotaExceeded: config.QuotaExceeded{SwitchProject: false, SwitchPreviewModel: false, AntigravityCredits: false},
234+
ClaudeKey: []config.ClaudeKey{{APIKey: "c1"}},
235+
CodexKey: []config.CodexKey{{APIKey: "x1"}},
236+
AmpCode: config.AmpCode{UpstreamAPIKey: "keep", RestrictManagementToLocalhost: false},
237+
RemoteManagement: config.RemoteManagement{DisableControlPanel: false, PanelGitHubRepository: "old/repo", SecretKey: "keep"},
237238
SDKConfig: sdkconfig.SDKConfig{
238239
RequestLog: false,
239240
ProxyURL: "http://old-proxy",
@@ -243,17 +244,18 @@ func TestBuildConfigChangeDetails_FlagsAndKeys(t *testing.T) {
243244
},
244245
}
245246
newCfg := &config.Config{
246-
Port: 2000,
247-
AuthDir: "/new",
248-
Debug: true,
249-
LoggingToFile: true,
250-
UsageStatisticsEnabled: true,
251-
DisableCooling: true,
252-
RequestRetry: 2,
253-
MaxRetryCredentials: 3,
254-
MaxRetryInterval: 3,
255-
WebsocketAuth: true,
256-
QuotaExceeded: config.QuotaExceeded{SwitchProject: true, SwitchPreviewModel: true, AntigravityCredits: true},
247+
Port: 2000,
248+
AuthDir: "/new",
249+
Debug: true,
250+
LoggingToFile: true,
251+
UsageStatisticsEnabled: true,
252+
DisableCooling: true,
253+
TransientErrorCooldownSeconds: 0,
254+
RequestRetry: 2,
255+
MaxRetryCredentials: 3,
256+
MaxRetryInterval: 3,
257+
WebsocketAuth: true,
258+
QuotaExceeded: config.QuotaExceeded{SwitchProject: true, SwitchPreviewModel: true, AntigravityCredits: true},
257259
ClaudeKey: []config.ClaudeKey{
258260
{APIKey: "c1", BaseURL: "http://new", ProxyURL: "http://p", Headers: map[string]string{"H": "1"}, ExcludedModels: []string{"a"}},
259261
{APIKey: "c2"},
@@ -288,6 +290,7 @@ func TestBuildConfigChangeDetails_FlagsAndKeys(t *testing.T) {
288290
expectContains(t, details, "logging-to-file: false -> true")
289291
expectContains(t, details, "usage-statistics-enabled: false -> true")
290292
expectContains(t, details, "disable-cooling: false -> true")
293+
expectContains(t, details, "transient-error-cooldown-seconds: 60 -> 0")
291294
expectContains(t, details, "disable-image-generation: false -> true")
292295
expectContains(t, details, "request-log: false -> true")
293296
expectContains(t, details, "request-retry: 1 -> 2")
@@ -313,17 +316,18 @@ func TestBuildConfigChangeDetails_FlagsAndKeys(t *testing.T) {
313316

314317
func TestBuildConfigChangeDetails_AllBranches(t *testing.T) {
315318
oldCfg := &config.Config{
316-
Port: 1,
317-
AuthDir: "/a",
318-
Debug: false,
319-
LoggingToFile: false,
320-
UsageStatisticsEnabled: false,
321-
DisableCooling: false,
322-
RequestRetry: 1,
323-
MaxRetryCredentials: 1,
324-
MaxRetryInterval: 1,
325-
WebsocketAuth: false,
326-
QuotaExceeded: config.QuotaExceeded{SwitchProject: false, SwitchPreviewModel: false, AntigravityCredits: false},
319+
Port: 1,
320+
AuthDir: "/a",
321+
Debug: false,
322+
LoggingToFile: false,
323+
UsageStatisticsEnabled: false,
324+
DisableCooling: false,
325+
TransientErrorCooldownSeconds: 60,
326+
RequestRetry: 1,
327+
MaxRetryCredentials: 1,
328+
MaxRetryInterval: 1,
329+
WebsocketAuth: false,
330+
QuotaExceeded: config.QuotaExceeded{SwitchProject: false, SwitchPreviewModel: false, AntigravityCredits: false},
327331
GeminiKey: []config.GeminiKey{
328332
{APIKey: "g-old", BaseURL: "http://g-old", ProxyURL: "http://gp-old", Headers: map[string]string{"A": "1"}},
329333
},
@@ -367,17 +371,18 @@ func TestBuildConfigChangeDetails_AllBranches(t *testing.T) {
367371
},
368372
}
369373
newCfg := &config.Config{
370-
Port: 2,
371-
AuthDir: "/b",
372-
Debug: true,
373-
LoggingToFile: true,
374-
UsageStatisticsEnabled: true,
375-
DisableCooling: true,
376-
RequestRetry: 2,
377-
MaxRetryCredentials: 3,
378-
MaxRetryInterval: 3,
379-
WebsocketAuth: true,
380-
QuotaExceeded: config.QuotaExceeded{SwitchProject: true, SwitchPreviewModel: true, AntigravityCredits: true},
374+
Port: 2,
375+
AuthDir: "/b",
376+
Debug: true,
377+
LoggingToFile: true,
378+
UsageStatisticsEnabled: true,
379+
DisableCooling: true,
380+
TransientErrorCooldownSeconds: 0,
381+
RequestRetry: 2,
382+
MaxRetryCredentials: 3,
383+
MaxRetryInterval: 3,
384+
WebsocketAuth: true,
385+
QuotaExceeded: config.QuotaExceeded{SwitchProject: true, SwitchPreviewModel: true, AntigravityCredits: true},
381386
GeminiKey: []config.GeminiKey{
382387
{APIKey: "g-new", BaseURL: "http://g-new", ProxyURL: "http://gp-new", Headers: map[string]string{"A": "2"}, ExcludedModels: []string{"x", "y"}},
383388
},
@@ -434,6 +439,7 @@ func TestBuildConfigChangeDetails_AllBranches(t *testing.T) {
434439
expectContains(t, changes, "logging-to-file: false -> true")
435440
expectContains(t, changes, "usage-statistics-enabled: false -> true")
436441
expectContains(t, changes, "disable-cooling: false -> true")
442+
expectContains(t, changes, "transient-error-cooldown-seconds: 60 -> 0")
437443
expectContains(t, changes, "disable-image-generation: false -> true")
438444
expectContains(t, changes, "request-retry: 1 -> 2")
439445
expectContains(t, changes, "max-retry-credentials: 1 -> 3")

sdk/cliproxy/auth/conductor.go

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,13 +76,37 @@ const (
7676
quotaBackoffMax = 30 * time.Minute
7777
)
7878

79-
var quotaCooldownDisabled atomic.Bool
79+
var (
80+
quotaCooldownDisabled atomic.Bool
81+
transientErrorCooldownSeconds atomic.Int64
82+
)
83+
84+
func init() {
85+
transientErrorCooldownSeconds.Store(int64((1 * time.Minute) / time.Second))
86+
}
8087

8188
// SetQuotaCooldownDisabled toggles quota cooldown scheduling globally.
8289
func SetQuotaCooldownDisabled(disable bool) {
8390
quotaCooldownDisabled.Store(disable)
8491
}
8592

93+
// SetTransientErrorCooldown updates the cooldown used for transient upstream errors.
94+
// Set duration to 0 to keep auths immediately reusable after 408/500/502/503/504.
95+
func SetTransientErrorCooldown(duration time.Duration) {
96+
if duration < 0 {
97+
duration = 0
98+
}
99+
transientErrorCooldownSeconds.Store(int64(duration / time.Second))
100+
}
101+
102+
func transientErrorCooldown() time.Duration {
103+
seconds := transientErrorCooldownSeconds.Load()
104+
if seconds <= 0 {
105+
return 0
106+
}
107+
return time.Duration(seconds) * time.Second
108+
}
109+
86110
func quotaCooldownDisabledForAuth(auth *Auth) bool {
87111
if auth != nil {
88112
if override, ok := auth.DisableCoolingOverride(); ok {
@@ -2252,9 +2276,11 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
22522276
case 408, 500, 502, 503, 504:
22532277
if disableCooling {
22542278
state.NextRetryAfter = time.Time{}
2255-
} else {
2256-
next := now.Add(1 * time.Minute)
2279+
} else if cooldown := transientErrorCooldown(); cooldown > 0 {
2280+
next := now.Add(cooldown)
22572281
state.NextRetryAfter = next
2282+
} else {
2283+
state.NextRetryAfter = time.Time{}
22582284
}
22592285
default:
22602286
state.NextRetryAfter = time.Time{}
@@ -2671,8 +2697,10 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati
26712697
auth.StatusMessage = "transient upstream error"
26722698
if disableCooling {
26732699
auth.NextRetryAfter = time.Time{}
2700+
} else if cooldown := transientErrorCooldown(); cooldown > 0 {
2701+
auth.NextRetryAfter = now.Add(cooldown)
26742702
} else {
2675-
auth.NextRetryAfter = now.Add(1 * time.Minute)
2703+
auth.NextRetryAfter = time.Time{}
26762704
}
26772705
default:
26782706
if auth.StatusMessage == "" {

sdk/cliproxy/auth/conductor_overrides_test.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,51 @@ func TestManager_MarkResult_RespectsAuthDisableCoolingOverride(t *testing.T) {
522522
}
523523
}
524524

525+
func TestManager_MarkResult_TransientErrorCooldownCanBeDisabled(t *testing.T) {
526+
prevCooling := quotaCooldownDisabled.Load()
527+
quotaCooldownDisabled.Store(false)
528+
prevTransient := transientErrorCooldownSeconds.Load()
529+
SetTransientErrorCooldown(0)
530+
t.Cleanup(func() {
531+
quotaCooldownDisabled.Store(prevCooling)
532+
transientErrorCooldownSeconds.Store(prevTransient)
533+
})
534+
535+
m := NewManager(nil, nil, nil)
536+
537+
auth := &Auth{
538+
ID: "auth-transient",
539+
Provider: "claude",
540+
}
541+
if _, errRegister := m.Register(context.Background(), auth); errRegister != nil {
542+
t.Fatalf("register auth: %v", errRegister)
543+
}
544+
545+
model := "test-model-transient"
546+
m.MarkResult(context.Background(), Result{
547+
AuthID: auth.ID,
548+
Provider: "claude",
549+
Model: model,
550+
Success: false,
551+
Error: &Error{HTTPStatus: http.StatusServiceUnavailable, Message: "upstream unavailable"},
552+
})
553+
554+
updated, ok := m.GetByID(auth.ID)
555+
if !ok || updated == nil {
556+
t.Fatalf("expected auth to be present")
557+
}
558+
state := updated.ModelStates[model]
559+
if state == nil {
560+
t.Fatalf("expected model state to be present")
561+
}
562+
if !state.Unavailable {
563+
t.Fatalf("expected model state to record the transient failure")
564+
}
565+
if !state.NextRetryAfter.IsZero() {
566+
t.Fatalf("expected transient NextRetryAfter to be zero, got %v", state.NextRetryAfter)
567+
}
568+
}
569+
525570
func TestManager_MarkResult_RespectsAuthDisableCoolingOverride_On403(t *testing.T) {
526571
prev := quotaCooldownDisabled.Load()
527572
quotaCooldownDisabled.Store(false)

0 commit comments

Comments
 (0)