seanbrar · seanbrar · Jun 17, 2026 · Jun 17, 2026
diff --git a/docs/configuration.md b/docs/configuration.md
@@ -32,11 +32,12 @@ All fields and their defaults:
 | Field | Type | Default | Description |
 |---|---|---|---|
 | `provider` | `"gemini" \| "openai" \| "anthropic" \| "openrouter" \| "local"` | *(required)* | Provider to use |
-| `model` | `str` | *(required)* | Model identifier |
+| `model` | `str \| None` | `None` | Model identifier. Required for cloud providers; optional for single-model local servers |
 | `api_key` | `str \| None` | `None` | Explicit key; auto-resolved from env if omitted. Optional for `provider="local"` |
 | `base_url` | `str \| None` | `None` | Required for `provider="local"`; rejected for cloud providers. Falls back to `POLLUX_LOCAL_BASE_URL` |
 | `use_mock` | `bool` | `False` | Use mock provider (no network calls) |
 | `request_concurrency` | `int` | `6` | Max concurrent API calls in multi-prompt execution |
+| `request_timeout_s` | `float` | `300.0` | HTTP request timeout in seconds for providers that own their transport, including `provider="local"` |
 | `retry` | `RetryPolicy` | `RetryPolicy()` | Retry configuration |
 
 ## API Key Resolution
@@ -67,13 +68,16 @@ project root for local development, but never commit it.
 ## Self-Hosted Models (`provider="local"`)
 
 Pollux supports self-hosted servers that speak the OpenAI Chat Completions wire
-format. Point `base_url` at the server; `api_key` is optional.
+format. Point `base_url` at the server; `api_key` is optional. If the server
+has a single configured model or rejects client-supplied model names, omit
+`model` and Pollux will leave the `model` request field out of local payloads.
 
 ```python
 config = Config(
     provider="local",
     model="gemma3:4b",
     base_url="http://localhost:11434/v1",
+    request_timeout_s=600,
 )
 ```
 

diff --git a/src/pollux/__init__.py b/src/pollux/__init__.py
@@ -668,9 +668,21 @@ def _build_openrouter(api_key: str | None, _base_url: str | None) -> Provider:
 
 
 def _build_local(api_key: str | None, base_url: str | None) -> Provider:
+    return _build_local_with_timeout(api_key, base_url, 300.0)
+
+
+def _build_local_with_timeout(
+    api_key: str | None,
+    base_url: str | None,
+    request_timeout_s: float,
+) -> Provider:
     from pollux.providers.local import LocalProvider
 
-    return LocalProvider(base_url=cast("str", base_url), api_key=api_key)
+    return LocalProvider(
+        base_url=cast("str", base_url),
+        api_key=api_key,
+        timeout_s=request_timeout_s,
+    )
 
 
 # Single source of truth for provider construction and lifecycle traits.
@@ -692,6 +704,7 @@ def _create_provider(
     *,
     use_mock: bool = False,
     base_url: str | None = None,
+    request_timeout_s: float = 300.0,
 ) -> Provider:
     """Instantiate a provider client from explicit parameters."""
     if use_mock:
@@ -724,6 +737,12 @@ def _create_provider(
             hint=f"Set {env_var} or pass Config(api_key=...).",
         )
 
+    if provider == "local":
+        return _build_local_with_timeout(
+            api_key,
+            base_url,
+            request_timeout_s,
+        )
     return spec.build(api_key, base_url)
 
 
@@ -734,6 +753,7 @@ def _get_provider(config: Config) -> Provider:
         config.api_key,
         use_mock=config.use_mock,
         base_url=config.base_url,
+        request_timeout_s=config.request_timeout_s,
     )
 
 

diff --git a/src/pollux/cache.py b/src/pollux/cache.py
@@ -298,8 +298,15 @@ async def create_cache_impl(
                     hint="Ensure all items in the tools list are dictionaries.",
                 )
 
+    if config.model is None:
+        raise ConfigurationError(
+            "create_cache() requires a configured model",
+            hint="Pass Config(model=...) when preparing persistent provider caches.",
+        )
+    model = config.model
+
     key = compute_cache_key(
-        config.model,
+        model,
         src_tuple,
         provider=config.provider,
         api_key=config.api_key,
@@ -312,7 +319,7 @@ async def create_cache_impl(
         cache_name, expires_at = cached
         return CacheHandle(
             name=cache_name,
-            model=config.model,
+            model=model,
             provider=config.provider,
             expires_at=expires_at,
         )
@@ -323,7 +330,7 @@ async def create_cache_impl(
         provider,
         _registry,
         key=key,
-        model=config.model,
+        model=model,
         raw_parts=raw_parts,
         system_instruction=system_instruction,
         tools=tools,
@@ -341,7 +348,7 @@ async def create_cache_impl(
 
     return CacheHandle(
         name=cache_name,
-        model=config.model,
+        model=model,
         provider=config.provider,
         expires_at=expires_at,
     )
diff --git a/src/pollux/config.py b/src/pollux/config.py
@@ -55,7 +55,9 @@ def _resolve_local_base_url() -> str | None:
 class Config:
     """Immutable configuration for Pollux execution.
 
-    Provider and model are required—Pollux does not guess what you want.
+    Provider and model are required for cloud providers—Pollux does not guess
+    what you want. Local OpenAI-compatible servers may omit ``model`` when the
+    server has a single configured model or otherwise does not require it.
     API keys are auto-resolved from standard environment variables.
 
     Example:
@@ -67,7 +69,7 @@ class Config:
     """
 
     provider: ProviderName
-    model: str
+    model: str | None = None
     #: Auto-resolved from the provider-specific API key env var when *None*.
     #: Optional for ``provider="local"``.
     api_key: str | None = None
@@ -76,6 +78,9 @@ class Config:
     base_url: str | None = None
     use_mock: bool = False
     request_concurrency: int = 6
+    #: HTTP request timeout in seconds for providers that own their transport.
+    #: Currently applied by the local OpenAI-compatible provider.
+    request_timeout_s: float = 300.0
     retry: RetryPolicy = field(default_factory=RetryPolicy)
     #: Optional capability declarations that override the provider's static
     #: capabilities for this config (v2 interaction path). A declared capability
@@ -106,6 +111,16 @@ def __post_init__(self) -> None:
                 f"request_concurrency must be ≥ 1, got {self.request_concurrency}",
                 hint="This controls how many API calls run in parallel.",
             )
+        if not isinstance(self.request_timeout_s, int | float):
+            raise ConfigurationError(
+                f"request_timeout_s must be numeric, got {type(self.request_timeout_s).__name__}",
+                hint="Pass a timeout in seconds, for example request_timeout_s=600.",
+            )
+        if self.request_timeout_s <= 0:
+            raise ConfigurationError(
+                f"request_timeout_s must be > 0, got {self.request_timeout_s}",
+                hint="Pass a positive timeout in seconds.",
+            )
 
         if self.provider == "local":
             # Local: resolve base_url for real calls, skip API-key resolution entirely.
@@ -123,6 +138,12 @@ def __post_init__(self) -> None:
                 )
             return
 
+        if not isinstance(self.model, str) or not self.model:
+            raise ConfigurationError(
+                f"model required for provider={self.provider!r}",
+                hint="Pass the provider model name, for example model='gpt-5-nano'.",
+            )
+
         # Cloud providers: base_url is not a meaningful override here.
         if self.base_url is not None:
             raise ConfigurationError(

diff --git a/src/pollux/deferred.py b/src/pollux/deferred.py
@@ -205,6 +205,11 @@ async def submit_deferred(
         if provider_handle.submitted_at is not None
         else time.time()
     )
+    if config.model is None:
+        raise ConfigurationError(
+            "defer() requires a configured model",
+            hint="Pass Config(model=...) for provider-side deferred jobs.",
+        )
     return DeferredHandle(
         job_id=provider_handle.job_id,
         provider=config.provider,

diff --git a/src/pollux/providers/anthropic.py b/src/pollux/providers/anthropic.py
@@ -7,7 +7,7 @@
 import inspect
 import json
 import logging
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, cast
 
 from pollux.errors import APIError, ConfigurationError
 from pollux.interaction.tools import ToolCallDelta
@@ -255,7 +255,7 @@ def _build_messages_create_kwargs(
         config: Config,
     ) -> dict[str, Any]:
         """Build the raw Anthropic Messages API request body."""
-        model = config.model
+        model = cast("str", config.model)
         history, _previous_response_id, provider_state = _compile.prior_turns(input)
         messages = self._build_messages(parts, history or None, provider_state)
         system_instruction = _compile.system_instruction(snapshot)
@@ -556,9 +556,10 @@ async def validate_request(
             requirements.reasoning_effort is not None
             or requirements.reasoning_budget_tokens is not None
         )
-        if wants_reasoning and _model_lacks_extended_thinking(config.model):
+        model = cast("str", config.model)
+        if wants_reasoning and _model_lacks_extended_thinking(model):
             raise ConfigurationError(
-                f"Model {config.model!r} does not support extended thinking",
+                f"Model {model!r} does not support extended thinking",
                 hint=(
                     "Remove reasoning_effort/reasoning_budget_tokens, or use a "
                     "model with extended thinking (Claude 3.7 or 4.x)."