diff --git a/core/src/llm/anthropic.rs b/core/src/llm/anthropic.rs
index 700d269..b1ba650 100644
--- a/core/src/llm/anthropic.rs
+++ b/core/src/llm/anthropic.rs
@@ -1,6 +1,7 @@
 //! Anthropic Claude LLM client
 
 use super::http::{default_http_client, normalize_base_url, HttpClient};
+use super::structured;
 use super::types::*;
 use super::LlmClient;
 use crate::retry::{AttemptOutcome, RetryConfig};
@@ -152,17 +153,24 @@ impl AnthropicClient {
     }
 }
 
-#[async_trait]
-impl LlmClient for AnthropicClient {
-    async fn complete(
-        &self,
-        messages: &[Message],
-        system: Option<&str>,
-        tools: &[ToolDefinition],
-    ) -> Result<LlmResponse> {
+impl AnthropicClient {
+    /// Apply a structured-output directive to an Anthropic request.
+    ///
+    /// Anthropic supports forced tool choice (`tool_choice`) but has no
+    /// `response_format`, so only `force_tool` is honored.
+    fn apply_directive(
+        request: &mut serde_json::Value,
+        directive: &structured::StructuredDirective,
+    ) {
+        if let Some(tool) = &directive.force_tool {
+            request["tool_choice"] = serde_json::json!({ "type": "tool", "name": tool });
+        }
+    }
+
+    /// Execute a fully-built (non-streaming) request body.
+    async fn send_request(&self, request_body: serde_json::Value) -> Result<LlmResponse> {
         {
             let request_started_at = Instant::now();
-            let request_body = self.build_request(messages, system, tools);
             let url = format!("{}/v1/messages", self.base_url);
 
             let headers = vec![
@@ -259,6 +267,35 @@ impl LlmClient for AnthropicClient {
             Ok(llm_response)
         }
     }
+}
+
+#[async_trait]
+impl LlmClient for AnthropicClient {
+    async fn complete(
+        &self,
+        messages: &[Message],
+        system: Option<&str>,
+        tools: &[ToolDefinition],
+    ) -> Result<LlmResponse> {
+        self.send_request(self.build_request(messages, system, tools))
+            .await
+    }
+
+    async fn complete_structured(
+        &self,
+        messages: &[Message],
+        system: Option<&str>,
+        tools: &[ToolDefinition],
+        directive: &structured::StructuredDirective,
+    ) -> Result<LlmResponse> {
+        let mut request_body = self.build_request(messages, system, tools);
+        Self::apply_directive(&mut request_body, directive);
+        self.send_request(request_body).await
+    }
+
+    fn native_structured_support(&self) -> structured::NativeStructuredSupport {
+        structured::NativeStructuredSupport::ForcedTool
+    }
 
     async fn complete_streaming(
         &self,
@@ -266,10 +303,34 @@ impl LlmClient for AnthropicClient {
         system: Option<&str>,
         tools: &[ToolDefinition],
         cancel_token: CancellationToken,
+    ) -> Result<mpsc::Receiver<StreamEvent>> {
+        self.send_streaming(self.build_request(messages, system, tools), cancel_token)
+            .await
+    }
+
+    async fn complete_streaming_structured(
+        &self,
+        messages: &[Message],
+        system: Option<&str>,
+        tools: &[ToolDefinition],
+        directive: &structured::StructuredDirective,
+        cancel_token: CancellationToken,
+    ) -> Result<mpsc::Receiver<StreamEvent>> {
+        let mut request_body = self.build_request(messages, system, tools);
+        Self::apply_directive(&mut request_body, directive);
+        self.send_streaming(request_body, cancel_token).await
+    }
+}
+
+impl AnthropicClient {
+    /// Execute a fully-built streaming request body (sets `stream: true`).
+    async fn send_streaming(
+        &self,
+        mut request_body: serde_json::Value,
+        cancel_token: CancellationToken,
     ) -> Result<mpsc::Receiver<StreamEvent>> {
         {
             let request_started_at = Instant::now();
-            let mut request_body = self.build_request(messages, system, tools);
             request_body["stream"] = serde_json::json!(true);
 
             let url = format!("{}/v1/messages", self.base_url);
@@ -739,4 +800,40 @@ mod tests {
         assert_eq!(req["max_tokens"], 16_000);
         assert_eq!(req["thinking"]["budget_tokens"], 8_000);
     }
+
+    #[test]
+    fn test_apply_directive_forces_tool_choice() {
+        let mut req = serde_json::json!({ "model": "m", "messages": [] });
+        let directive = structured::StructuredDirective {
+            force_tool: Some("emit_person".to_string()),
+            response_format: None,
+        };
+        AnthropicClient::apply_directive(&mut req, &directive);
+        assert_eq!(req["tool_choice"]["type"], "tool");
+        assert_eq!(req["tool_choice"]["name"], "emit_person");
+    }
+
+    #[test]
+    fn test_apply_directive_ignores_response_format() {
+        // Anthropic has no response_format; both a response_format-only and an
+        // empty directive must be no-ops.
+        let mut req = serde_json::json!({ "model": "m" });
+        AnthropicClient::apply_directive(
+            &mut req,
+            &structured::StructuredDirective {
+                force_tool: None,
+                response_format: Some(structured::ResponseFormat::JsonObject),
+            },
+        );
+        assert!(req.get("response_format").is_none());
+        assert!(req.get("tool_choice").is_none());
+    }
+
+    #[test]
+    fn test_native_structured_support_is_forced_tool() {
+        assert_eq!(
+            make_client().native_structured_support(),
+            structured::NativeStructuredSupport::ForcedTool
+        );
+    }
 }
diff --git a/core/src/llm/mod.rs b/core/src/llm/mod.rs
index 27b3443..9b8d460 100644
--- a/core/src/llm/mod.rs
+++ b/core/src/llm/mod.rs
@@ -48,6 +48,44 @@ pub trait LlmClient: Send + Sync {
         tools: &[ToolDefinition],
         cancel_token: CancellationToken,
     ) -> Result<mpsc::Receiver<StreamEvent>>;
+
+    /// Report the strongest provider-native structured-output enforcement this
+    /// client supports. Used by [`structured`] to decide whether to force a
+    /// tool call, request a native `response_format`, or fall back to
+    /// prompt-and-parse. Defaults to no native support.
+    fn native_structured_support(&self) -> structured::NativeStructuredSupport {
+        structured::NativeStructuredSupport::None
+    }
+
+    /// Complete a conversation while honoring a structured-output directive
+    /// (forced `tool_choice` and/or native `response_format`).
+    ///
+    /// The default implementation ignores the directive and behaves exactly
+    /// like [`LlmClient::complete`], so existing clients keep working unchanged;
+    /// providers that support native structured output override this.
+    async fn complete_structured(
+        &self,
+        messages: &[Message],
+        system: Option<&str>,
+        tools: &[ToolDefinition],
+        _directive: &structured::StructuredDirective,
+    ) -> Result<LlmResponse> {
+        self.complete(messages, system, tools).await
+    }
+
+    /// Streaming counterpart of [`LlmClient::complete_structured`]. Defaults to
+    /// [`LlmClient::complete_streaming`], ignoring the directive.
+    async fn complete_streaming_structured(
+        &self,
+        messages: &[Message],
+        system: Option<&str>,
+        tools: &[ToolDefinition],
+        _directive: &structured::StructuredDirective,
+        cancel_token: CancellationToken,
+    ) -> Result<mpsc::Receiver<StreamEvent>> {
+        self.complete_streaming(messages, system, tools, cancel_token)
+            .await
+    }
 }
 
 // Include test modules — these reference internal types via crate paths
diff --git a/core/src/llm/openai.rs b/core/src/llm/openai.rs
index 67acfa2..85d9f57 100644
--- a/core/src/llm/openai.rs
+++ b/core/src/llm/openai.rs
@@ -1,6 +1,7 @@
 //! OpenAI-compatible LLM client
 
 use super::http::{default_http_client, normalize_base_url, HttpClient};
+use super::structured;
 use super::types::*;
 use super::LlmClient;
 use crate::llm::types::{ToolResultContent, ToolResultContentField};
@@ -282,43 +283,80 @@ impl OpenAiClient {
     }
 }
 
-#[async_trait]
-impl LlmClient for OpenAiClient {
-    async fn complete(
+impl OpenAiClient {
+    /// Apply a structured-output directive to an OpenAI-compatible request.
+    ///
+    /// OpenAI-compatible APIs support both forced function `tool_choice` and
+    /// native `response_format` (`json_object` / `json_schema` + `strict`).
+    fn apply_directive(
+        request: &mut serde_json::Value,
+        directive: &structured::StructuredDirective,
+    ) {
+        if let Some(tool) = &directive.force_tool {
+            request["tool_choice"] = serde_json::json!({
+                "type": "function",
+                "function": { "name": tool }
+            });
+        }
+        if let Some(rf) = &directive.response_format {
+            request["response_format"] = match rf {
+                structured::ResponseFormat::JsonObject => {
+                    serde_json::json!({ "type": "json_object" })
+                }
+                structured::ResponseFormat::JsonSchema { name, schema } => serde_json::json!({
+                    "type": "json_schema",
+                    "json_schema": { "name": name, "schema": schema, "strict": true }
+                }),
+            };
+        }
+    }
+
+    /// Build a chat-completions request body, optionally applying a directive.
+    fn build_chat_request(
         &self,
         messages: &[Message],
         system: Option<&str>,
         tools: &[ToolDefinition],
-    ) -> Result<LlmResponse> {
-        {
-            let request_started_at = Instant::now();
-            let mut openai_messages = Vec::new();
+        directive: Option<&structured::StructuredDirective>,
+    ) -> serde_json::Value {
+        let mut openai_messages = Vec::new();
+
+        if let Some(sys) = system {
+            openai_messages.push(serde_json::json!({
+                "role": "system",
+                "content": sys,
+            }));
+        }
 
-            if let Some(sys) = system {
-                openai_messages.push(serde_json::json!({
-                    "role": "system",
-                    "content": sys,
-                }));
-            }
+        openai_messages.extend(self.convert_messages(messages));
 
-            openai_messages.extend(self.convert_messages(messages));
+        let mut request = serde_json::json!({
+            "model": self.model,
+            "messages": openai_messages,
+        });
 
-            let mut request = serde_json::json!({
-                "model": self.model,
-                "messages": openai_messages,
-            });
+        if let Some(temp) = self.temperature {
+            request["temperature"] = serde_json::json!(temp);
+        }
+        if let Some(max) = self.max_tokens {
+            request["max_tokens"] = serde_json::json!(max);
+        }
 
-            if let Some(temp) = self.temperature {
-                request["temperature"] = serde_json::json!(temp);
-            }
-            if let Some(max) = self.max_tokens {
-                request["max_tokens"] = serde_json::json!(max);
-            }
+        if !tools.is_empty() {
+            request["tools"] = serde_json::json!(self.convert_tools(tools));
+        }
 
-            if !tools.is_empty() {
-                request["tools"] = serde_json::json!(self.convert_tools(tools));
-            }
+        if let Some(directive) = directive {
+            Self::apply_directive(&mut request, directive);
+        }
 
+        request
+    }
+
+    /// Execute a fully-built (non-streaming) chat-completions request.
+    async fn send_request(&self, request: serde_json::Value) -> Result<LlmResponse> {
+        {
+            let request_started_at = Instant::now();
             let url = format!("{}{}", self.base_url, self.chat_completions_path);
             let request_headers = self.request_headers();
 
@@ -442,45 +480,76 @@ impl LlmClient for OpenAiClient {
             Ok(llm_response)
         }
     }
+}
 
-    async fn complete_streaming(
+#[async_trait]
+impl LlmClient for OpenAiClient {
+    async fn complete(
         &self,
         messages: &[Message],
         system: Option<&str>,
         tools: &[ToolDefinition],
-        cancel_token: tokio_util::sync::CancellationToken,
-    ) -> Result<mpsc::Receiver<StreamEvent>> {
-        {
-            let request_started_at = Instant::now();
-            let mut openai_messages = Vec::new();
-
-            if let Some(sys) = system {
-                openai_messages.push(serde_json::json!({
-                    "role": "system",
-                    "content": sys,
-                }));
-            }
+    ) -> Result<LlmResponse> {
+        self.send_request(self.build_chat_request(messages, system, tools, None))
+            .await
+    }
 
-            openai_messages.extend(self.convert_messages(messages));
+    async fn complete_structured(
+        &self,
+        messages: &[Message],
+        system: Option<&str>,
+        tools: &[ToolDefinition],
+        directive: &structured::StructuredDirective,
+    ) -> Result<LlmResponse> {
+        self.send_request(self.build_chat_request(messages, system, tools, Some(directive)))
+            .await
+    }
 
-            let mut request = serde_json::json!({
-                "model": self.model,
-                "messages": openai_messages,
-                "stream": true,
-                "stream_options": { "include_usage": true },
-            });
+    fn native_structured_support(&self) -> structured::NativeStructuredSupport {
+        structured::NativeStructuredSupport::JsonSchema
+    }
 
-            if let Some(temp) = self.temperature {
-                request["temperature"] = serde_json::json!(temp);
-            }
-            if let Some(max) = self.max_tokens {
-                request["max_tokens"] = serde_json::json!(max);
-            }
+    async fn complete_streaming(
+        &self,
+        messages: &[Message],
+        system: Option<&str>,
+        tools: &[ToolDefinition],
+        cancel_token: tokio_util::sync::CancellationToken,
+    ) -> Result<mpsc::Receiver<StreamEvent>> {
+        self.send_streaming(
+            self.build_chat_request(messages, system, tools, None),
+            cancel_token,
+        )
+        .await
+    }
 
-            if !tools.is_empty() {
-                request["tools"] = serde_json::json!(self.convert_tools(tools));
-            }
+    async fn complete_streaming_structured(
+        &self,
+        messages: &[Message],
+        system: Option<&str>,
+        tools: &[ToolDefinition],
+        directive: &structured::StructuredDirective,
+        cancel_token: tokio_util::sync::CancellationToken,
+    ) -> Result<mpsc::Receiver<StreamEvent>> {
+        self.send_streaming(
+            self.build_chat_request(messages, system, tools, Some(directive)),
+            cancel_token,
+        )
+        .await
+    }
+}
 
+impl OpenAiClient {
+    /// Execute a fully-built streaming chat-completions request (sets `stream`).
+    async fn send_streaming(
+        &self,
+        mut request: serde_json::Value,
+        cancel_token: tokio_util::sync::CancellationToken,
+    ) -> Result<mpsc::Receiver<StreamEvent>> {
+        {
+            request["stream"] = serde_json::json!(true);
+            request["stream_options"] = serde_json::json!({ "include_usage": true });
+            let request_started_at = Instant::now();
             let url = format!("{}{}", self.base_url, self.chat_completions_path);
             let request_headers = self.request_headers();
 
@@ -1107,3 +1176,99 @@ pub(crate) struct OpenAiFunctionDelta {
     pub(crate) name: Option<String>,
     pub(crate) arguments: Option<String>,
 }
+
+// ============================================================================
+// Tests
+// ============================================================================
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::llm::types::{Message, ToolDefinition};
+
+    fn make_client() -> OpenAiClient {
+        OpenAiClient::new("test-key".to_string(), "gpt-test".to_string())
+    }
+
+    #[test]
+    fn test_apply_directive_forced_function_tool_choice() {
+        let mut req = serde_json::json!({ "model": "m" });
+        OpenAiClient::apply_directive(
+            &mut req,
+            &structured::StructuredDirective {
+                force_tool: Some("emit_person".to_string()),
+                response_format: None,
+            },
+        );
+        assert_eq!(req["tool_choice"]["type"], "function");
+        assert_eq!(req["tool_choice"]["function"]["name"], "emit_person");
+        assert!(req.get("response_format").is_none());
+    }
+
+    #[test]
+    fn test_apply_directive_json_schema_strict() {
+        let mut req = serde_json::json!({});
+        OpenAiClient::apply_directive(
+            &mut req,
+            &structured::StructuredDirective {
+                force_tool: None,
+                response_format: Some(structured::ResponseFormat::JsonSchema {
+                    name: "person".to_string(),
+                    schema: serde_json::json!({ "type": "object" }),
+                }),
+            },
+        );
+        assert_eq!(req["response_format"]["type"], "json_schema");
+        assert_eq!(req["response_format"]["json_schema"]["name"], "person");
+        assert_eq!(req["response_format"]["json_schema"]["strict"], true);
+        assert!(req.get("tool_choice").is_none());
+    }
+
+    #[test]
+    fn test_apply_directive_json_object() {
+        let mut req = serde_json::json!({});
+        OpenAiClient::apply_directive(
+            &mut req,
+            &structured::StructuredDirective {
+                force_tool: None,
+                response_format: Some(structured::ResponseFormat::JsonObject),
+            },
+        );
+        assert_eq!(req["response_format"]["type"], "json_object");
+    }
+
+    #[test]
+    fn test_build_chat_request_applies_directive_and_system() {
+        let req = make_client().build_chat_request(
+            &[Message::user("hi")],
+            Some("sys"),
+            &[ToolDefinition {
+                name: "emit_x".to_string(),
+                description: "emit".to_string(),
+                parameters: serde_json::json!({ "type": "object" }),
+            }],
+            Some(&structured::StructuredDirective {
+                force_tool: Some("emit_x".to_string()),
+                response_format: None,
+            }),
+        );
+        assert_eq!(req["messages"][0]["role"], "system");
+        assert_eq!(req["tool_choice"]["function"]["name"], "emit_x");
+        assert_eq!(req["tools"][0]["function"]["name"], "emit_x");
+    }
+
+    #[test]
+    fn test_build_chat_request_without_directive_is_plain() {
+        let req = make_client().build_chat_request(&[Message::user("hi")], None, &[], None);
+        assert!(req.get("tool_choice").is_none());
+        assert!(req.get("response_format").is_none());
+    }
+
+    #[test]
+    fn test_native_structured_support_is_json_schema() {
+        assert_eq!(
+            make_client().native_structured_support(),
+            structured::NativeStructuredSupport::JsonSchema
+        );
+    }
+}
diff --git a/core/src/llm/structured.rs b/core/src/llm/structured.rs
index 3db0378..8043947 100644
--- a/core/src/llm/structured.rs
+++ b/core/src/llm/structured.rs
@@ -54,6 +54,50 @@ pub struct StructuredResult {
     pub mode_used: StructuredMode,
 }
 
+/// Provider-native structured-output capability.
+///
+/// Each [`LlmClient`] reports this so the structured engine can request the
+/// strongest enforcement the provider actually supports. Defaults to
+/// [`NativeStructuredSupport::None`] for clients that don't override it.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum NativeStructuredSupport {
+    /// No native enforcement — rely on prompt instructions + lenient extraction.
+    None,
+    /// Can force a specific tool call (Anthropic `tool_choice`, OpenAI function
+    /// `tool_choice`). Guarantees the model emits the structured tool call
+    /// instead of free-form prose.
+    ForcedTool,
+    /// Supports OpenAI-style `response_format` (`json_object` and
+    /// `json_schema` + `strict`) in addition to forced tool calls.
+    JsonSchema,
+}
+
+/// A native `response_format` request for OpenAI-compatible providers.
+#[derive(Debug, Clone, PartialEq)]
+pub enum ResponseFormat {
+    /// `{"type":"json_object"}` — guarantees syntactically valid JSON, but not
+    /// schema conformance.
+    JsonObject,
+    /// `{"type":"json_schema","json_schema":{name,schema,strict:true}}` —
+    /// parser-enforced schema conformance.
+    JsonSchema { name: String, schema: Value },
+}
+
+/// Instruction telling a provider how to enforce structured output for a call.
+///
+/// Carries the union of intents; each provider honors what it supports and
+/// ignores the rest (e.g. Anthropic has no `response_format`, so it only acts
+/// on `force_tool`). The default (`force_tool: None, response_format: None`)
+/// reproduces an ordinary completion, which is why the trait's default
+/// `complete_structured` impl is behavior-preserving.
+#[derive(Debug, Clone, Default, PartialEq)]
+pub struct StructuredDirective {
+    /// Force the model to call exactly this tool (provider `tool_choice`).
+    pub force_tool: Option<String>,
+    /// Request a provider-native `response_format` (OpenAI-compatible only).
+    pub response_format: Option<ResponseFormat>,
+}
+
 /// Callback for streaming partial object snapshots.
 pub type PartialObjectCallback = Box<dyn Fn(&Value) + Send>;
 
@@ -69,17 +113,18 @@ pub async fn generate_blocking(
     client: &dyn LlmClient,
     req: &StructuredRequest,
 ) -> Result<StructuredResult> {
-    let mode = req.mode;
+    let mode = resolve_mode(req.mode, client.native_structured_support());
     let mut messages = build_initial_messages(req, mode);
     let system = build_system_prompt(req, mode);
     let tools = build_tools(req, mode);
+    let directive = build_directive(req, mode);
 
     let mut total_usage = TokenUsage::default();
     let mut repair_rounds: u8 = 0;
 
     loop {
         let resp = client
-            .complete(&messages, Some(&system), &tools)
+            .complete_structured(&messages, Some(&system), &tools, &directive)
             .await
             .context("LLM call failed during structured generation")?;
 
@@ -153,14 +198,15 @@ pub async fn generate_streaming(
     req: &StructuredRequest,
     on_partial: PartialObjectCallback,
 ) -> Result<StructuredResult> {
-    let mode = req.mode;
+    let mode = resolve_mode(req.mode, client.native_structured_support());
     let messages = build_initial_messages(req, mode);
     let system = build_system_prompt(req, mode);
     let tools = build_tools(req, mode);
+    let directive = build_directive(req, mode);
 
     let cancel_token = CancellationToken::new();
     let mut rx = client
-        .complete_streaming(&messages, Some(&system), &tools, cancel_token)
+        .complete_streaming_structured(&messages, Some(&system), &tools, &directive, cancel_token)
         .await
         .context("LLM streaming call failed during structured generation")?;
 
@@ -802,6 +848,50 @@ fn value_type_name(value: &Value) -> &'static str {
 // Message/prompt construction helpers
 // ---------------------------------------------------------------------------
 
+/// Resolve the requested mode against the provider's native capability.
+///
+/// `Auto`/`Tool` always resolve to forced `Tool` mode — the most reliable
+/// cross-provider strategy (the synthetic `emit_*` tool is made mandatory via
+/// the provider's `tool_choice`). `Strict`/`Json` use native `response_format`
+/// only when the provider reports [`NativeStructuredSupport::JsonSchema`];
+/// otherwise they fall back to forced `Tool` mode rather than silently
+/// degrading to unconstrained text.
+fn resolve_mode(requested: StructuredMode, support: NativeStructuredSupport) -> StructuredMode {
+    match requested {
+        StructuredMode::Prompt => StructuredMode::Prompt,
+        StructuredMode::Strict if support == NativeStructuredSupport::JsonSchema => {
+            StructuredMode::Strict
+        }
+        StructuredMode::Json if support == NativeStructuredSupport::JsonSchema => {
+            StructuredMode::Json
+        }
+        // Auto, Tool, or Strict/Json on a provider without json_schema support.
+        _ => StructuredMode::Tool,
+    }
+}
+
+/// Build the provider directive for an already-resolved mode.
+fn build_directive(req: &StructuredRequest, mode: StructuredMode) -> StructuredDirective {
+    match mode {
+        StructuredMode::Tool => StructuredDirective {
+            force_tool: Some(format!("emit_{}", req.schema_name)),
+            response_format: None,
+        },
+        StructuredMode::Strict => StructuredDirective {
+            force_tool: None,
+            response_format: Some(ResponseFormat::JsonSchema {
+                name: req.schema_name.clone(),
+                schema: req.schema.clone(),
+            }),
+        },
+        StructuredMode::Json => StructuredDirective {
+            force_tool: None,
+            response_format: Some(ResponseFormat::JsonObject),
+        },
+        StructuredMode::Auto | StructuredMode::Prompt => StructuredDirective::default(),
+    }
+}
+
 fn build_initial_messages(req: &StructuredRequest, mode: StructuredMode) -> Vec<Message> {
     match mode {
         StructuredMode::Tool => {
@@ -809,8 +899,10 @@ fn build_initial_messages(req: &StructuredRequest, mode: StructuredMode) -> Vec<
             // with a tool call whose input is the structured object.
             vec![Message::user(&req.prompt)]
         }
-        StructuredMode::Prompt => {
-            // Append schema instructions to the user prompt
+        StructuredMode::Prompt | StructuredMode::Json => {
+            // Prompt mode and json_object mode both need the schema in the prompt:
+            // json_object only guarantees *syntactic* validity, so the model still
+            // has to be told the shape it should produce.
             let augmented = format!(
                 "{}\n\nYou MUST respond with ONLY a valid JSON object (no markdown, no explanation) that conforms to this JSON Schema:\n\n```json\n{}\n```",
                 req.prompt,
@@ -819,8 +911,8 @@ fn build_initial_messages(req: &StructuredRequest, mode: StructuredMode) -> Vec<
             vec![Message::user(&augmented)]
         }
         _ => {
-            // Strict/Json modes: the schema constraint is enforced by the provider,
-            // so the user message is just the prompt.
+            // Strict mode: the schema constraint is enforced by the provider via
+            // response_format.json_schema, so the user message is just the prompt.
             vec![Message::user(&req.prompt)]
         }
     }
@@ -838,7 +930,7 @@ fn build_system_prompt(req: &StructuredRequest, mode: StructuredMode) -> String
                 req.schema_name
             )
         }
-        StructuredMode::Prompt => {
+        StructuredMode::Prompt | StructuredMode::Json => {
             format!(
                 "{}{}You are a structured data extraction assistant. Always respond with valid JSON only, no markdown fences, no explanation text.",
                 base,
diff --git a/core/src/llm/structured_tests.rs b/core/src/llm/structured_tests.rs
index 3ba3cc0..d2d5bdf 100644
--- a/core/src/llm/structured_tests.rs
+++ b/core/src/llm/structured_tests.rs
@@ -1322,3 +1322,336 @@ async fn test_integration_generate_complex_nested_schema() {
         error["code"], result.repair_rounds
     );
 }
+
+// ========================================================================
+// Native structured-output routing (capability + directive) tests
+//
+// These verify the core stability fix: the structured engine resolves the
+// mode against the client's native capability and passes the correct
+// directive (forced tool_choice vs. response_format) to complete_structured.
+// ========================================================================
+
+/// A client that records the directive + tool surface it was asked to honor,
+/// and reports a configurable native capability.
+struct RecordingClient {
+    support: NativeStructuredSupport,
+    responses: Mutex<Vec<LlmResponse>>,
+    last_directive: Mutex<Option<StructuredDirective>>,
+    last_tool_names: Mutex<Vec<String>>,
+    structured_calls: std::sync::atomic::AtomicUsize,
+}
+
+impl RecordingClient {
+    fn new(support: NativeStructuredSupport, responses: Vec<LlmResponse>) -> Self {
+        Self {
+            support,
+            responses: Mutex::new(responses),
+            last_directive: Mutex::new(None),
+            last_tool_names: Mutex::new(Vec::new()),
+            structured_calls: std::sync::atomic::AtomicUsize::new(0),
+        }
+    }
+
+    fn pop(&self) -> anyhow::Result<LlmResponse> {
+        let mut responses = self.responses.lock().unwrap();
+        if responses.is_empty() {
+            anyhow::bail!("No more mock responses");
+        }
+        Ok(responses.remove(0))
+    }
+}
+
+#[async_trait]
+impl LlmClient for RecordingClient {
+    async fn complete(
+        &self,
+        _messages: &[Message],
+        _system: Option<&str>,
+        _tools: &[ToolDefinition],
+    ) -> anyhow::Result<LlmResponse> {
+        self.pop()
+    }
+
+    async fn complete_streaming(
+        &self,
+        _messages: &[Message],
+        _system: Option<&str>,
+        _tools: &[ToolDefinition],
+        _cancel_token: CancellationToken,
+    ) -> anyhow::Result<mpsc::Receiver<StreamEvent>> {
+        anyhow::bail!("streaming not used in routing tests")
+    }
+
+    fn native_structured_support(&self) -> NativeStructuredSupport {
+        self.support
+    }
+
+    async fn complete_structured(
+        &self,
+        _messages: &[Message],
+        _system: Option<&str>,
+        tools: &[ToolDefinition],
+        directive: &StructuredDirective,
+    ) -> anyhow::Result<LlmResponse> {
+        self.structured_calls
+            .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
+        *self.last_directive.lock().unwrap() = Some(directive.clone());
+        *self.last_tool_names.lock().unwrap() = tools.iter().map(|t| t.name.clone()).collect();
+        self.pop()
+    }
+
+    async fn complete_streaming_structured(
+        &self,
+        _messages: &[Message],
+        _system: Option<&str>,
+        tools: &[ToolDefinition],
+        directive: &StructuredDirective,
+        _cancel_token: CancellationToken,
+    ) -> anyhow::Result<mpsc::Receiver<StreamEvent>> {
+        self.structured_calls
+            .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
+        *self.last_directive.lock().unwrap() = Some(directive.clone());
+        *self.last_tool_names.lock().unwrap() = tools.iter().map(|t| t.name.clone()).collect();
+        let response = self.pop()?;
+        let (tx, rx) = mpsc::channel(10);
+        tokio::spawn(async move {
+            for block in &response.message.content {
+                if let ContentBlock::ToolUse { name, input, .. } = block {
+                    tx.send(StreamEvent::ToolUseStart {
+                        id: "call_001".to_string(),
+                        name: name.clone(),
+                    })
+                    .await
+                    .ok();
+                    let json_str = serde_json::to_string(input).unwrap();
+                    for chunk in json_str.as_bytes().chunks(8) {
+                        tx.send(StreamEvent::ToolUseInputDelta(
+                            String::from_utf8_lossy(chunk).to_string(),
+                        ))
+                        .await
+                        .ok();
+                    }
+                } else if let ContentBlock::Text { text } = block {
+                    for chunk in text.as_bytes().chunks(8) {
+                        tx.send(StreamEvent::TextDelta(
+                            String::from_utf8_lossy(chunk).to_string(),
+                        ))
+                        .await
+                        .ok();
+                    }
+                }
+            }
+            tx.send(StreamEvent::Done(response)).await.ok();
+        });
+        Ok(rx)
+    }
+}
+
+fn person_request(mode: StructuredMode) -> StructuredRequest {
+    StructuredRequest {
+        prompt: "Extract the person".to_string(),
+        system: None,
+        schema: serde_json::json!({
+            "type": "object",
+            "required": ["name"],
+            "properties": { "name": { "type": "string" } }
+        }),
+        schema_name: "person".to_string(),
+        schema_description: None,
+        mode,
+        max_repair_attempts: 0,
+    }
+}
+
+#[tokio::test]
+async fn test_routing_tool_mode_forces_tool_choice() {
+    let client = RecordingClient::new(
+        NativeStructuredSupport::ForcedTool,
+        vec![MockStructuredClient::tool_call_response(
+            "emit_person",
+            serde_json::json!({ "name": "Bob" }),
+        )],
+    );
+    let result = generate_blocking(&client, &person_request(StructuredMode::Tool))
+        .await
+        .unwrap();
+
+    assert_eq!(result.object["name"], "Bob");
+    assert_eq!(result.mode_used, StructuredMode::Tool);
+    assert_eq!(
+        client
+            .structured_calls
+            .load(std::sync::atomic::Ordering::SeqCst),
+        1
+    );
+
+    let directive = client.last_directive.lock().unwrap().clone().unwrap();
+    assert_eq!(directive.force_tool.as_deref(), Some("emit_person"));
+    assert!(directive.response_format.is_none());
+    // The synthetic emit tool must be present in the tool surface.
+    assert_eq!(
+        client.last_tool_names.lock().unwrap().as_slice(),
+        &["emit_person".to_string()]
+    );
+}
+
+#[tokio::test]
+async fn test_routing_auto_collapses_to_forced_tool() {
+    let client = RecordingClient::new(
+        NativeStructuredSupport::JsonSchema,
+        vec![MockStructuredClient::tool_call_response(
+            "emit_person",
+            serde_json::json!({ "name": "Bob" }),
+        )],
+    );
+    let result = generate_blocking(&client, &person_request(StructuredMode::Auto))
+        .await
+        .unwrap();
+
+    assert_eq!(result.mode_used, StructuredMode::Tool);
+    let directive = client.last_directive.lock().unwrap().clone().unwrap();
+    assert_eq!(directive.force_tool.as_deref(), Some("emit_person"));
+}
+
+#[tokio::test]
+async fn test_routing_strict_uses_response_format_when_supported() {
+    let client = RecordingClient::new(
+        NativeStructuredSupport::JsonSchema,
+        vec![MockStructuredClient::text_response(r#"{"name": "Bob"}"#)],
+    );
+    let result = generate_blocking(&client, &person_request(StructuredMode::Strict))
+        .await
+        .unwrap();
+
+    assert_eq!(result.object["name"], "Bob");
+    assert_eq!(result.mode_used, StructuredMode::Strict);
+
+    let directive = client.last_directive.lock().unwrap().clone().unwrap();
+    assert!(directive.force_tool.is_none());
+    match directive.response_format {
+        Some(ResponseFormat::JsonSchema {
+            ref name,
+            ref schema,
+        }) => {
+            assert_eq!(name, "person");
+            assert_eq!(schema["required"][0], "name");
+        }
+        other => panic!("expected json_schema response_format, got {:?}", other),
+    }
+    // Strict mode must not inject a tool.
+    assert!(client.last_tool_names.lock().unwrap().is_empty());
+}
+
+#[tokio::test]
+async fn test_routing_strict_falls_back_to_tool_without_support() {
+    // A provider with no native json_schema support must NOT silently degrade
+    // to unconstrained text — it falls back to forced tool mode.
+    let client = RecordingClient::new(
+        NativeStructuredSupport::ForcedTool,
+        vec![MockStructuredClient::tool_call_response(
+            "emit_person",
+            serde_json::json!({ "name": "Bob" }),
+        )],
+    );
+    let result = generate_blocking(&client, &person_request(StructuredMode::Strict))
+        .await
+        .unwrap();
+
+    assert_eq!(result.mode_used, StructuredMode::Tool);
+    let directive = client.last_directive.lock().unwrap().clone().unwrap();
+    assert_eq!(directive.force_tool.as_deref(), Some("emit_person"));
+    assert!(directive.response_format.is_none());
+}
+
+#[tokio::test]
+async fn test_routing_json_uses_json_object_when_supported() {
+    let client = RecordingClient::new(
+        NativeStructuredSupport::JsonSchema,
+        vec![MockStructuredClient::text_response(r#"{"name": "Bob"}"#)],
+    );
+    let result = generate_blocking(&client, &person_request(StructuredMode::Json))
+        .await
+        .unwrap();
+
+    assert_eq!(result.mode_used, StructuredMode::Json);
+    let directive = client.last_directive.lock().unwrap().clone().unwrap();
+    assert_eq!(directive.response_format, Some(ResponseFormat::JsonObject));
+    assert!(directive.force_tool.is_none());
+}
+
+#[tokio::test]
+async fn test_streaming_routing_tool_mode_forces_tool_choice() {
+    // The streaming path must also force the directive (via
+    // complete_streaming_structured), not silently drop it.
+    let client = RecordingClient::new(
+        NativeStructuredSupport::ForcedTool,
+        vec![MockStructuredClient::tool_call_response(
+            "emit_person",
+            serde_json::json!({ "name": "Bob" }),
+        )],
+    );
+
+    let partials = Arc::new(Mutex::new(0usize));
+    let partials_cb = partials.clone();
+    let result = generate_streaming(
+        &client,
+        &person_request(StructuredMode::Tool),
+        Box::new(move |_partial| {
+            *partials_cb.lock().unwrap() += 1;
+        }),
+    )
+    .await
+    .unwrap();
+
+    assert_eq!(result.object["name"], "Bob");
+    assert_eq!(result.mode_used, StructuredMode::Tool);
+    let directive = client.last_directive.lock().unwrap().clone().unwrap();
+    assert_eq!(directive.force_tool.as_deref(), Some("emit_person"));
+    assert!(
+        *partials.lock().unwrap() >= 1,
+        "on_partial should fire at least once (final object)"
+    );
+}
+
+// ========================================================================
+// Adversarial JSON extraction edge cases
+// ========================================================================
+
+#[test]
+fn test_extract_json_crlf_code_fence() {
+    let input = "```json\r\n{\"a\": 1}\r\n```";
+    let result = extract_json_value(input).unwrap();
+    assert_eq!(result["a"], 1);
+}
+
+#[test]
+fn test_extract_json_prose_with_brace_in_string() {
+    // A `}` inside a string value, plus trailing prose: balanced extraction
+    // must keep the in-string brace and stop at the real closing brace.
+    let input = "Sure thing: {\"msg\": \"close with } please\"} — done.";
+    let result = extract_json_value(input).unwrap();
+    assert_eq!(result["msg"], "close with } please");
+}
+
+#[test]
+fn test_extract_json_single_quotes_rejected() {
+    // Single-quoted pseudo-JSON is NOT valid JSON; extraction must fail rather
+    // than silently mis-parse. (Documents current, intentional behavior.)
+    assert!(extract_json_value("{'name': 'Bob'}").is_err());
+}
+
+#[test]
+fn test_extract_raw_output_tool_mode_falls_back_to_text() {
+    // If the model ignores the forced tool and replies with text anyway, tool
+    // mode must fall back to reading the message text.
+    let msg = Message {
+        role: "assistant".to_string(),
+        content: vec![ContentBlock::Text {
+            text: r#"{"name": "Bob"}"#.to_string(),
+        }],
+        reasoning_content: None,
+    };
+    let raw = extract_raw_output(&msg, StructuredMode::Tool);
+    let value = extract_json_value(&raw).unwrap();
+    assert_eq!(value["name"], "Bob");
+}
diff --git a/core/src/llm/zhipu.rs b/core/src/llm/zhipu.rs
index 2ad1218..47f052a 100644
--- a/core/src/llm/zhipu.rs
+++ b/core/src/llm/zhipu.rs
@@ -4,6 +4,7 @@
 //! This client wraps `OpenAiClient` with the correct GLM defaults.
 
 use super::openai::OpenAiClient;
+use super::structured;
 use super::types::*;
 use super::LlmClient;
 use crate::retry::RetryConfig;
@@ -79,4 +80,33 @@ impl LlmClient for ZhipuClient {
             .complete_streaming(messages, system, tools, cancel_token)
             .await
     }
+
+    fn native_structured_support(&self) -> structured::NativeStructuredSupport {
+        self.0.native_structured_support()
+    }
+
+    async fn complete_structured(
+        &self,
+        messages: &[Message],
+        system: Option<&str>,
+        tools: &[ToolDefinition],
+        directive: &structured::StructuredDirective,
+    ) -> Result<LlmResponse> {
+        self.0
+            .complete_structured(messages, system, tools, directive)
+            .await
+    }
+
+    async fn complete_streaming_structured(
+        &self,
+        messages: &[Message],
+        system: Option<&str>,
+        tools: &[ToolDefinition],
+        directive: &structured::StructuredDirective,
+        cancel_token: CancellationToken,
+    ) -> Result<mpsc::Receiver<StreamEvent>> {
+        self.0
+            .complete_streaming_structured(messages, system, tools, directive, cancel_token)
+            .await
+    }
 }
diff --git a/core/src/planning/llm_planner.rs b/core/src/planning/llm_planner.rs
index 11f7f50..e1a4669 100644
--- a/core/src/planning/llm_planner.rs
+++ b/core/src/planning/llm_planner.rs
@@ -215,19 +215,41 @@ impl LlmPlanner {
     pub async fn pre_analyze(llm: &Arc<dyn LlmClient>, prompt: &str) -> Result<PreAnalysis> {
         let system = crate::prompts::PRE_ANALYSIS_SYSTEM;
 
-        let messages = vec![Message::user(prompt)];
-        let response = llm
-            .complete(&messages, Some(system), &[])
-            .await
-            .context("LLM pre-analysis call failed")?;
+        // One initial attempt plus one repair round: if the model returns
+        // unparseable JSON, re-prompt it once to emit strictly valid JSON before
+        // giving up (callers fall back to heuristics on the returned error).
+        const MAX_ATTEMPTS: usize = 2;
+        let mut messages = vec![Message::user(prompt)];
+        let mut last_err: Option<anyhow::Error> = None;
+
+        for attempt in 0..MAX_ATTEMPTS {
+            let response = llm
+                .complete(&messages, Some(system), &[])
+                .await
+                .context("LLM pre-analysis call failed")?;
+
+            let text = response.text();
+            match Self::parse_pre_analysis_response(&text, prompt) {
+                Ok(analysis) => return Ok(analysis),
+                Err(e) => {
+                    last_err = Some(e);
+                    if attempt + 1 < MAX_ATTEMPTS {
+                        messages.push(response.message.clone());
+                        messages.push(Message::user(
+                            "Your previous response was not valid JSON matching the required \
+                             schema. Respond again with ONLY the JSON object — no markdown \
+                             fences, no prose, no explanation.",
+                        ));
+                    }
+                }
+            }
+        }
 
-        let text = response.text();
-        Self::parse_pre_analysis_response(&text, prompt)
+        Err(last_err.unwrap_or_else(|| anyhow::anyhow!("pre-analysis produced no result")))
     }
 
     fn parse_pre_analysis_response(text: &str, original_prompt: &str) -> Result<PreAnalysis> {
-        let cleaned = Self::extract_json(text);
-        let parsed: PreAnalysisResponse = serde_json::from_str(cleaned)
+        let parsed: PreAnalysisResponse = Self::parse_json_lenient(text)
             .context("Failed to parse pre-analysis JSON from LLM response")?;
 
         let intent = match parsed.intent.to_lowercase().as_str() {
@@ -286,9 +308,8 @@ impl LlmPlanner {
     // ========================================================================
 
     fn parse_plan_response(text: &str) -> Result<ExecutionPlan> {
-        let cleaned = Self::extract_json(text);
-        let parsed: PlanResponse =
-            serde_json::from_str(cleaned).context("Failed to parse plan JSON from LLM response")?;
+        let parsed: PlanResponse = Self::parse_json_lenient(text)
+            .context("Failed to parse plan JSON from LLM response")?;
 
         let complexity = match parsed.complexity.as_str() {
             "Simple" => Complexity::Simple,
@@ -322,16 +343,14 @@ impl LlmPlanner {
     }
 
     fn parse_goal_response(text: &str) -> Result<AgentGoal> {
-        let cleaned = Self::extract_json(text);
-        let parsed: GoalResponse =
-            serde_json::from_str(cleaned).context("Failed to parse goal JSON from LLM response")?;
+        let parsed: GoalResponse = Self::parse_json_lenient(text)
+            .context("Failed to parse goal JSON from LLM response")?;
 
         Ok(AgentGoal::new(parsed.description).with_criteria(parsed.success_criteria))
     }
 
     fn parse_achievement_response(text: &str) -> Result<AchievementResult> {
-        let cleaned = Self::extract_json(text);
-        let parsed: AchievementResponse = serde_json::from_str(cleaned)
+        let parsed: AchievementResponse = Self::parse_json_lenient(text)
             .context("Failed to parse achievement JSON from LLM response")?;
 
         Ok(AchievementResult {
@@ -341,20 +360,15 @@ impl LlmPlanner {
         })
     }
 
-    /// Extract JSON from LLM text that may contain markdown fences
-    fn extract_json(text: &str) -> &str {
-        let trimmed = text.trim();
-
-        // Strip markdown code fences if present
-        if let Some(start) = trimmed.find('{') {
-            if let Some(end) = trimmed.rfind('}') {
-                if start <= end {
-                    return &trimmed[start..=end];
-                }
-            }
-        }
-
-        trimmed
+    /// Parse JSON from possibly-dirty LLM output into the target type.
+    ///
+    /// Uses the shared robust extractor ([`crate::llm::structured::extract_json_value`]),
+    /// which handles ```json fences, surrounding prose, and braces embedded in
+    /// strings — unlike the previous naive first-`{`/last-`}` slice, which broke
+    /// on fenced output or any `}` inside a string value.
+    fn parse_json_lenient<T: serde::de::DeserializeOwned>(text: &str) -> Result<T> {
+        let value = crate::llm::structured::extract_json_value(text)?;
+        Ok(serde_json::from_value(value)?)
     }
 }
 
@@ -521,19 +535,125 @@ mod tests {
     }
 
     #[test]
-    fn test_extract_json_plain() {
-        assert_eq!(LlmPlanner::extract_json("  {\"a\": 1}  "), "{\"a\": 1}");
+    fn test_parse_json_lenient_plain() {
+        let v: serde_json::Value = LlmPlanner::parse_json_lenient("  {\"a\": 1}  ").unwrap();
+        assert_eq!(v["a"], 1);
     }
 
     #[test]
-    fn test_extract_json_with_fences() {
+    fn test_parse_json_lenient_with_fences() {
         let text = "```json\n{\"a\": 1}\n```";
-        assert_eq!(LlmPlanner::extract_json(text), "{\"a\": 1}");
+        let v: serde_json::Value = LlmPlanner::parse_json_lenient(text).unwrap();
+        assert_eq!(v["a"], 1);
     }
 
     #[test]
-    fn test_extract_json_with_surrounding_text() {
+    fn test_parse_json_lenient_with_surrounding_prose() {
         let text = "Here is the plan:\n{\"goal\": \"test\"}\nDone.";
-        assert_eq!(LlmPlanner::extract_json(text), "{\"goal\": \"test\"}");
+        let v: serde_json::Value = LlmPlanner::parse_json_lenient(text).unwrap();
+        assert_eq!(v["goal"], "test");
+    }
+
+    #[test]
+    fn test_parse_json_lenient_brace_inside_string_value() {
+        // The old naive first-`{`/last-`}` slice broke when a string value
+        // contained a `}` followed by trailing prose; the robust extractor
+        // balances braces while respecting string boundaries.
+        let text = "Result: {\"note\": \"use a closing brace } here\"} -- end.";
+        let v: serde_json::Value = LlmPlanner::parse_json_lenient(text).unwrap();
+        assert_eq!(v["note"], "use a closing brace } here");
+    }
+
+    #[test]
+    fn test_parse_json_lenient_fenced_with_trailing_prose() {
+        // ```json fence followed by an explanation. The naive parser's
+        // `rfind('}')` could grab a brace from the trailing prose.
+        let text = "```json\n{\"goal\": \"ship\"}\n```\nNote: revisit the `plan` later.";
+        let v: serde_json::Value = LlmPlanner::parse_json_lenient(text).unwrap();
+        assert_eq!(v["goal"], "ship");
+    }
+
+    #[test]
+    fn test_parse_json_lenient_rejects_non_json() {
+        let err = LlmPlanner::parse_json_lenient::<serde_json::Value>("no json here at all");
+        assert!(err.is_err());
+    }
+
+    /// Replays a fixed sequence of assistant text responses, one per call.
+    struct ReplayClient {
+        responses: std::sync::Mutex<Vec<String>>,
+    }
+
+    impl ReplayClient {
+        fn new(responses: Vec<String>) -> Self {
+            Self {
+                responses: std::sync::Mutex::new(responses),
+            }
+        }
+    }
+
+    #[async_trait::async_trait]
+    impl LlmClient for ReplayClient {
+        async fn complete(
+            &self,
+            _messages: &[Message],
+            _system: Option<&str>,
+            _tools: &[crate::llm::ToolDefinition],
+        ) -> anyhow::Result<crate::llm::LlmResponse> {
+            let text = {
+                let mut r = self.responses.lock().unwrap();
+                if r.is_empty() {
+                    String::new()
+                } else {
+                    r.remove(0)
+                }
+            };
+            Ok(crate::llm::LlmResponse {
+                message: Message {
+                    role: "assistant".to_string(),
+                    content: vec![crate::llm::ContentBlock::Text { text }],
+                    reasoning_content: None,
+                },
+                usage: crate::llm::TokenUsage::default(),
+                stop_reason: None,
+                meta: None,
+            })
+        }
+
+        async fn complete_streaming(
+            &self,
+            _messages: &[Message],
+            _system: Option<&str>,
+            _tools: &[crate::llm::ToolDefinition],
+            _cancel_token: tokio_util::sync::CancellationToken,
+        ) -> anyhow::Result<tokio::sync::mpsc::Receiver<crate::llm::StreamEvent>> {
+            anyhow::bail!("streaming not used in planner tests")
+        }
+    }
+
+    #[tokio::test]
+    async fn test_pre_analyze_repairs_invalid_json() {
+        // First response is unparseable; pre_analyze must re-prompt and succeed
+        // on the second (valid) response.
+        let good = r#"{"intent":"explore","requires_planning":false,"goal":{"description":"Do x","success_criteria":["done"]},"execution_plan":{"complexity":"Simple","steps":[],"required_tools":[]},"optimized_input":"Do x carefully"}"#;
+        let client: Arc<dyn LlmClient> = Arc::new(ReplayClient::new(vec![
+            "Sorry — here's the plan, but not as JSON.".to_string(),
+            good.to_string(),
+        ]));
+        let pa = LlmPlanner::pre_analyze(&client, "do x").await.unwrap();
+        assert_eq!(pa.optimized_input, "Do x carefully");
+    }
+
+    #[tokio::test]
+    async fn test_pre_analyze_first_try_with_fenced_json() {
+        // A single ```json-fenced response must parse on the first attempt
+        // (robust extractor), with no repair round needed.
+        let good = format!(
+            "```json\n{}\n```",
+            r#"{"intent":"plan","requires_planning":true,"goal":{"description":"g","success_criteria":[]},"execution_plan":{"complexity":"Medium","steps":[],"required_tools":[]},"optimized_input":"opt"}"#
+        );
+        let client: Arc<dyn LlmClient> = Arc::new(ReplayClient::new(vec![good]));
+        let pa = LlmPlanner::pre_analyze(&client, "do x").await.unwrap();
+        assert_eq!(pa.optimized_input, "opt");
     }
 }
diff --git a/core/src/tools/builtin/generate_object.rs b/core/src/tools/builtin/generate_object.rs
index 69ce406..cdbdeaf 100644
--- a/core/src/tools/builtin/generate_object.rs
+++ b/core/src/tools/builtin/generate_object.rs
@@ -145,16 +145,10 @@ impl Tool for GenerateObjectTool {
             _ => StructuredMode::Auto,
         };
 
-        // Resolve mode: Auto → Tool (safest cross-provider default).
-        // Strict/Json modes require provider-level response_format support which
-        // is not yet wired — fall back to Tool mode to avoid silent degradation.
-        let resolved_mode = match mode {
-            StructuredMode::Auto | StructuredMode::Strict | StructuredMode::Json => {
-                StructuredMode::Tool
-            }
-            other => other,
-        };
-
+        // Mode resolution is delegated to the structured engine, which inspects
+        // the client's native capability: Auto/Tool become forced tool calls,
+        // and Strict/Json use native `response_format` when the provider
+        // supports it (falling back to forced Tool mode otherwise).
         let max_repair_attempts = args
             .get("max_repair_attempts")
             .and_then(|v| v.as_u64())
@@ -167,7 +161,7 @@ impl Tool for GenerateObjectTool {
             schema,
             schema_name,
             schema_description,
-            mode: resolved_mode,
+            mode,
             max_repair_attempts,
         };
 
diff --git a/core/tests/test_structured_json_real_llm.rs b/core/tests/test_structured_json_real_llm.rs
new file mode 100644
index 0000000..072ef10
--- /dev/null
+++ b/core/tests/test_structured_json_real_llm.rs
@@ -0,0 +1,262 @@
+//! Real-LLM validation of the JSON-object-generation stability fix.
+//!
+//! Exercises the changed code paths against the live provider configured in
+//! `.a3s/config.acl`:
+//!   * forced-`tool_choice` structured generation (Tool mode) — the core fix,
+//!     run repeatedly to prove stability;
+//!   * native `response_format` (Json / Strict modes) on the OpenAI-compatible
+//!     provider;
+//!   * the hardened planner pre-analysis JSON path (`LlmPlanner::pre_analyze`).
+//!
+//! `#[ignore]` — requires a live provider in `.a3s/config.acl` and network
+//! access to it. Run:
+//!
+//! ```bash
+//! A3S_CONFIG_FILE=/abs/path/.a3s/config.acl \
+//!   cargo test -p a3s-code-core --test test_structured_json_real_llm -- --ignored --nocapture
+//! ```
+
+use std::path::PathBuf;
+use std::sync::Arc;
+use std::time::Duration;
+
+use a3s_code_core::config::CodeConfig;
+use a3s_code_core::llm::structured::{
+    generate_blocking, generate_streaming, PartialObjectCallback, StructuredMode,
+    StructuredRequest, StructuredResult,
+};
+use a3s_code_core::llm::{create_client_with_config, LlmClient};
+use a3s_code_core::planning::LlmPlanner;
+use serde_json::{json, Value};
+
+/// Hard ceiling per LLM call so a flaky/hung endpoint fails the test fast
+/// instead of stalling for minutes.
+const CALL_TIMEOUT: Duration = Duration::from_secs(90);
+
+fn repo_config_path() -> PathBuf {
+    std::env::var_os("A3S_CONFIG_FILE")
+        .map(PathBuf::from)
+        .unwrap_or_else(|| {
+            PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+                .join("../../..")
+                .join(".a3s/config.acl")
+        })
+}
+
+/// Build a client from `.a3s/config.acl`. By default uses the config's
+/// `default_model`; set `A3S_TEST_MODEL=provider/model` to target a specific
+/// model (e.g. a tool-capable one for structured-output tests).
+fn real_client() -> Arc<dyn LlmClient> {
+    let path = repo_config_path();
+    let config = CodeConfig::from_file(&path)
+        .unwrap_or_else(|e| panic!("failed to load {}: {e}", path.display()));
+
+    let llm_config = match std::env::var("A3S_TEST_MODEL") {
+        Ok(spec) => {
+            let (provider, model) = spec
+                .split_once('/')
+                .expect("A3S_TEST_MODEL must be 'provider/model'");
+            eprintln!("[real-llm] model = {spec} (from {})", path.display());
+            config
+                .llm_config(provider, model)
+                .unwrap_or_else(|| panic!("model {spec} not found in {}", path.display()))
+        }
+        Err(_) => {
+            eprintln!("[real-llm] model = <default> (from {})", path.display());
+            config
+                .default_llm_config()
+                .expect("default llm config in .a3s/config.acl")
+        }
+    };
+    create_client_with_config(llm_config)
+}
+
+/// Run a structured generation with a hard timeout.
+async fn gen_with_timeout(
+    client: &dyn LlmClient,
+    req: &StructuredRequest,
+) -> anyhow::Result<StructuredResult> {
+    match tokio::time::timeout(CALL_TIMEOUT, generate_blocking(client, req)).await {
+        Ok(res) => res,
+        Err(_) => anyhow::bail!("LLM call exceeded {CALL_TIMEOUT:?}"),
+    }
+}
+
+/// A non-trivial nested schema — the kind of object whose generation users
+/// reported as unstable.
+fn person_schema() -> Value {
+    json!({
+        "type": "object",
+        "required": ["name", "age", "skills"],
+        "additionalProperties": false,
+        "properties": {
+            "name": { "type": "string" },
+            "age": { "type": "integer" },
+            "skills": { "type": "array", "items": { "type": "string" } },
+            "address": {
+                "type": "object",
+                "properties": { "city": { "type": "string" } }
+            }
+        }
+    })
+}
+
+fn person_request(mode: StructuredMode) -> StructuredRequest {
+    StructuredRequest {
+        prompt: "Extract a structured person profile from this text: \
+                 'Alice is 30 years old, a Rust and Python developer living in Berlin.'"
+            .to_string(),
+        system: None,
+        schema: person_schema(),
+        schema_name: "person".to_string(),
+        schema_description: Some("A person profile".to_string()),
+        mode,
+        max_repair_attempts: 2,
+    }
+}
+
+fn assert_valid_person(object: &Value) {
+    assert!(
+        object["name"].is_string(),
+        "name must be a string, got {object}"
+    );
+    assert!(
+        object["age"].is_i64() || object["age"].is_u64(),
+        "age must be an integer, got {object}"
+    );
+    assert!(
+        object["skills"].is_array(),
+        "skills must be an array, got {object}"
+    );
+}
+
+/// The core fix: forced `tool_choice` structured generation must be STABLE.
+/// Run several independent times and require every one to yield a valid,
+/// schema-conforming object.
+#[tokio::test(flavor = "multi_thread")]
+#[ignore = "requires real provider credentials and network access"]
+async fn real_structured_tool_mode_is_stable() {
+    let client = real_client();
+    const RUNS: usize = 5;
+    let mut total_repairs = 0u32;
+
+    for i in 0..RUNS {
+        let result = gen_with_timeout(client.as_ref(), &person_request(StructuredMode::Tool))
+            .await
+            .unwrap_or_else(|e| panic!("run {i}: forced-tool structured generation failed: {e}"));
+
+        assert_eq!(
+            result.mode_used,
+            StructuredMode::Tool,
+            "run {i}: expected forced Tool mode"
+        );
+        assert_valid_person(&result.object);
+        total_repairs += result.repair_rounds as u32;
+        eprintln!(
+            "[tool] run {i}: ok (repairs={}) -> {}",
+            result.repair_rounds, result.object
+        );
+    }
+
+    eprintln!(
+        "[tool] {RUNS}/{RUNS} runs produced valid objects; total repair rounds = {total_repairs}"
+    );
+}
+
+/// The streaming structured path must also force `tool_choice` and yield a
+/// valid object (the streaming counterpart of the core fix).
+#[tokio::test(flavor = "multi_thread")]
+#[ignore = "requires real provider credentials and network access"]
+async fn real_structured_tool_mode_streaming() {
+    let client = real_client();
+    let partials = std::sync::Arc::new(std::sync::Mutex::new(0usize));
+    let partials_cb = partials.clone();
+    let on_partial: PartialObjectCallback = Box::new(move |_p| {
+        *partials_cb.lock().unwrap() += 1;
+    });
+
+    let result = tokio::time::timeout(
+        CALL_TIMEOUT,
+        generate_streaming(
+            client.as_ref(),
+            &person_request(StructuredMode::Tool),
+            on_partial,
+        ),
+    )
+    .await
+    .expect("streaming call timed out")
+    .expect("streaming tool-mode generation failed");
+
+    assert_eq!(result.mode_used, StructuredMode::Tool);
+    assert_valid_person(&result.object);
+    eprintln!(
+        "[tool-stream] partials={} -> {}",
+        *partials.lock().unwrap(),
+        result.object
+    );
+}
+
+/// Native `response_format: json_object` on the OpenAI-compatible provider.
+#[tokio::test(flavor = "multi_thread")]
+#[ignore = "requires real provider credentials and network access"]
+async fn real_structured_json_mode() {
+    let client = real_client();
+    let result = gen_with_timeout(client.as_ref(), &person_request(StructuredMode::Json))
+        .await
+        .expect("json_object structured generation failed");
+    assert_valid_person(&result.object);
+    eprintln!(
+        "[json] mode_used={:?} repairs={} -> {}",
+        result.mode_used, result.repair_rounds, result.object
+    );
+}
+
+/// Native `response_format: json_schema` (strict). Some providers reject schemas
+/// that don't meet their strict subset, so this is tolerant: it must either
+/// succeed with a valid object or fail cleanly (never hang or mis-parse).
+#[tokio::test(flavor = "multi_thread")]
+#[ignore = "requires real provider credentials and network access"]
+async fn real_structured_strict_mode() {
+    let client = real_client();
+    match gen_with_timeout(client.as_ref(), &person_request(StructuredMode::Strict)).await {
+        Ok(result) => {
+            assert_valid_person(&result.object);
+            eprintln!(
+                "[strict] ok mode_used={:?} repairs={} -> {}",
+                result.mode_used, result.repair_rounds, result.object
+            );
+        }
+        Err(e) => {
+            // Acceptable: provider may not support strict json_schema for this
+            // schema. The point is that it fails cleanly, not silently wrong.
+            eprintln!("[strict] provider rejected native json_schema (acceptable): {e}");
+        }
+    }
+}
+
+/// The hardened planner pre-analysis path against a real model: the response
+/// must parse into a `PreAnalysis` (robust extractor + one repair retry).
+#[tokio::test(flavor = "multi_thread")]
+#[ignore = "requires real provider credentials and network access"]
+async fn real_pre_analyze_parses() {
+    let client = real_client();
+    let analysis = tokio::time::timeout(
+        CALL_TIMEOUT,
+        LlmPlanner::pre_analyze(
+            &client,
+            "Refactor the auth module in src/auth.rs to use async/await, and keep the public API stable.",
+        ),
+    )
+    .await
+    .expect("pre_analyze timed out")
+    .expect("pre_analyze should parse a real model's JSON response");
+
+    assert!(
+        !analysis.optimized_input.trim().is_empty(),
+        "optimized_input should be populated"
+    );
+    eprintln!(
+        "[pre_analyze] requires_planning={} optimized_input={:?}",
+        analysis.requires_planning, analysis.optimized_input
+    );
+}