Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,12 @@ All fields and their defaults:
| Field | Type | Default | Description |
|---|---|---|---|
| `provider` | `"gemini" \| "openai" \| "anthropic" \| "openrouter" \| "local"` | *(required)* | Provider to use |
| `model` | `str` | *(required)* | Model identifier |
| `model` | `str \| None` | `None` | Model identifier. Required for cloud providers; optional for single-model local servers |
| `api_key` | `str \| None` | `None` | Explicit key; auto-resolved from env if omitted. Optional for `provider="local"` |
| `base_url` | `str \| None` | `None` | Required for `provider="local"`; rejected for cloud providers. Falls back to `POLLUX_LOCAL_BASE_URL` |
| `use_mock` | `bool` | `False` | Use mock provider (no network calls) |
| `request_concurrency` | `int` | `6` | Max concurrent API calls in multi-prompt execution |
| `request_timeout_s` | `float` | `300.0` | HTTP request timeout in seconds for providers that own their transport, including `provider="local"` |
| `retry` | `RetryPolicy` | `RetryPolicy()` | Retry configuration |

## API Key Resolution
Expand Down Expand Up @@ -67,13 +68,16 @@ project root for local development, but never commit it.
## Self-Hosted Models (`provider="local"`)

Pollux supports self-hosted servers that speak the OpenAI Chat Completions wire
format. Point `base_url` at the server; `api_key` is optional.
format. Point `base_url` at the server; `api_key` is optional. If the server
has a single configured model or rejects client-supplied model names, omit
`model` and Pollux will leave the `model` request field out of local payloads.

```python
config = Config(
provider="local",
model="gemma3:4b",
base_url="http://localhost:11434/v1",
request_timeout_s=600,
)
```

Expand Down
22 changes: 21 additions & 1 deletion src/pollux/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,9 +668,21 @@ def _build_openrouter(api_key: str | None, _base_url: str | None) -> Provider:


def _build_local(api_key: str | None, base_url: str | None) -> Provider:
return _build_local_with_timeout(api_key, base_url, 300.0)


def _build_local_with_timeout(
api_key: str | None,
base_url: str | None,
request_timeout_s: float,
) -> Provider:
from pollux.providers.local import LocalProvider

return LocalProvider(base_url=cast("str", base_url), api_key=api_key)
return LocalProvider(
base_url=cast("str", base_url),
api_key=api_key,
timeout_s=request_timeout_s,
)


# Single source of truth for provider construction and lifecycle traits.
Expand All @@ -692,6 +704,7 @@ def _create_provider(
*,
use_mock: bool = False,
base_url: str | None = None,
request_timeout_s: float = 300.0,
) -> Provider:
"""Instantiate a provider client from explicit parameters."""
if use_mock:
Expand Down Expand Up @@ -724,6 +737,12 @@ def _create_provider(
hint=f"Set {env_var} or pass Config(api_key=...).",
)

if provider == "local":
return _build_local_with_timeout(
api_key,
base_url,
request_timeout_s,
)
return spec.build(api_key, base_url)


Expand All @@ -734,6 +753,7 @@ def _get_provider(config: Config) -> Provider:
config.api_key,
use_mock=config.use_mock,
base_url=config.base_url,
request_timeout_s=config.request_timeout_s,
)


Expand Down
15 changes: 11 additions & 4 deletions src/pollux/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,8 +298,15 @@ async def create_cache_impl(
hint="Ensure all items in the tools list are dictionaries.",
)

if config.model is None:
raise ConfigurationError(
"create_cache() requires a configured model",
hint="Pass Config(model=...) when preparing persistent provider caches.",
)
model = config.model

key = compute_cache_key(
config.model,
model,
src_tuple,
provider=config.provider,
api_key=config.api_key,
Expand All @@ -312,7 +319,7 @@ async def create_cache_impl(
cache_name, expires_at = cached
return CacheHandle(
name=cache_name,
model=config.model,
model=model,
provider=config.provider,
expires_at=expires_at,
)
Expand All @@ -323,7 +330,7 @@ async def create_cache_impl(
provider,
_registry,
key=key,
model=config.model,
model=model,
raw_parts=raw_parts,
system_instruction=system_instruction,
tools=tools,
Expand All @@ -341,7 +348,7 @@ async def create_cache_impl(

return CacheHandle(
name=cache_name,
model=config.model,
model=model,
provider=config.provider,
expires_at=expires_at,
)
25 changes: 23 additions & 2 deletions src/pollux/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ def _resolve_local_base_url() -> str | None:
class Config:
"""Immutable configuration for Pollux execution.

Provider and model are required—Pollux does not guess what you want.
Provider and model are required for cloud providers—Pollux does not guess
what you want. Local OpenAI-compatible servers may omit ``model`` when the
server has a single configured model or otherwise does not require it.
API keys are auto-resolved from standard environment variables.

Example:
Expand All @@ -67,7 +69,7 @@ class Config:
"""

provider: ProviderName
model: str
model: str | None = None
#: Auto-resolved from the provider-specific API key env var when *None*.
#: Optional for ``provider="local"``.
api_key: str | None = None
Expand All @@ -76,6 +78,9 @@ class Config:
base_url: str | None = None
use_mock: bool = False
request_concurrency: int = 6
#: HTTP request timeout in seconds for providers that own their transport.
#: Currently applied by the local OpenAI-compatible provider.
request_timeout_s: float = 300.0
retry: RetryPolicy = field(default_factory=RetryPolicy)
#: Optional capability declarations that override the provider's static
#: capabilities for this config (v2 interaction path). A declared capability
Expand Down Expand Up @@ -106,6 +111,16 @@ def __post_init__(self) -> None:
f"request_concurrency must be ≥ 1, got {self.request_concurrency}",
hint="This controls how many API calls run in parallel.",
)
if not isinstance(self.request_timeout_s, int | float):
raise ConfigurationError(
f"request_timeout_s must be numeric, got {type(self.request_timeout_s).__name__}",
hint="Pass a timeout in seconds, for example request_timeout_s=600.",
)
if self.request_timeout_s <= 0:
raise ConfigurationError(
f"request_timeout_s must be > 0, got {self.request_timeout_s}",
hint="Pass a positive timeout in seconds.",
)

if self.provider == "local":
# Local: resolve base_url for real calls, skip API-key resolution entirely.
Expand All @@ -123,6 +138,12 @@ def __post_init__(self) -> None:
)
return

if not isinstance(self.model, str) or not self.model:
raise ConfigurationError(
f"model required for provider={self.provider!r}",
hint="Pass the provider model name, for example model='gpt-5-nano'.",
)

# Cloud providers: base_url is not a meaningful override here.
if self.base_url is not None:
raise ConfigurationError(
Expand Down
5 changes: 5 additions & 0 deletions src/pollux/deferred.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,11 @@ async def submit_deferred(
if provider_handle.submitted_at is not None
else time.time()
)
if config.model is None:
raise ConfigurationError(
"defer() requires a configured model",
hint="Pass Config(model=...) for provider-side deferred jobs.",
)
return DeferredHandle(
job_id=provider_handle.job_id,
provider=config.provider,
Expand Down
9 changes: 5 additions & 4 deletions src/pollux/providers/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import inspect
import json
import logging
from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any, cast

from pollux.errors import APIError, ConfigurationError
from pollux.interaction.tools import ToolCallDelta
Expand Down Expand Up @@ -255,7 +255,7 @@ def _build_messages_create_kwargs(
config: Config,
) -> dict[str, Any]:
"""Build the raw Anthropic Messages API request body."""
model = config.model
model = cast("str", config.model)
history, _previous_response_id, provider_state = _compile.prior_turns(input)
messages = self._build_messages(parts, history or None, provider_state)
system_instruction = _compile.system_instruction(snapshot)
Expand Down Expand Up @@ -556,9 +556,10 @@ async def validate_request(
requirements.reasoning_effort is not None
or requirements.reasoning_budget_tokens is not None
)
if wants_reasoning and _model_lacks_extended_thinking(config.model):
model = cast("str", config.model)
if wants_reasoning and _model_lacks_extended_thinking(model):
raise ConfigurationError(
f"Model {config.model!r} does not support extended thinking",
f"Model {model!r} does not support extended thinking",
hint=(
"Remove reasoning_effort/reasoning_budget_tokens, or use a "
"model with extended thinking (Claude 3.7 or 4.x)."
Expand Down
Loading
Loading