diff --git a/core/src/llm/anthropic.rs b/core/src/llm/anthropic.rs index 700d269..b1ba650 100644 --- a/core/src/llm/anthropic.rs +++ b/core/src/llm/anthropic.rs @@ -1,6 +1,7 @@ //! Anthropic Claude LLM client use super::http::{default_http_client, normalize_base_url, HttpClient}; +use super::structured; use super::types::*; use super::LlmClient; use crate::retry::{AttemptOutcome, RetryConfig}; @@ -152,17 +153,24 @@ impl AnthropicClient { } } -#[async_trait] -impl LlmClient for AnthropicClient { - async fn complete( - &self, - messages: &[Message], - system: Option<&str>, - tools: &[ToolDefinition], - ) -> Result { +impl AnthropicClient { + /// Apply a structured-output directive to an Anthropic request. + /// + /// Anthropic supports forced tool choice (`tool_choice`) but has no + /// `response_format`, so only `force_tool` is honored. + fn apply_directive( + request: &mut serde_json::Value, + directive: &structured::StructuredDirective, + ) { + if let Some(tool) = &directive.force_tool { + request["tool_choice"] = serde_json::json!({ "type": "tool", "name": tool }); + } + } + + /// Execute a fully-built (non-streaming) request body. + async fn send_request(&self, request_body: serde_json::Value) -> Result { { let request_started_at = Instant::now(); - let request_body = self.build_request(messages, system, tools); let url = format!("{}/v1/messages", self.base_url); let headers = vec![ @@ -259,6 +267,35 @@ impl LlmClient for AnthropicClient { Ok(llm_response) } } +} + +#[async_trait] +impl LlmClient for AnthropicClient { + async fn complete( + &self, + messages: &[Message], + system: Option<&str>, + tools: &[ToolDefinition], + ) -> Result { + self.send_request(self.build_request(messages, system, tools)) + .await + } + + async fn complete_structured( + &self, + messages: &[Message], + system: Option<&str>, + tools: &[ToolDefinition], + directive: &structured::StructuredDirective, + ) -> Result { + let mut request_body = self.build_request(messages, system, tools); + Self::apply_directive(&mut request_body, directive); + self.send_request(request_body).await + } + + fn native_structured_support(&self) -> structured::NativeStructuredSupport { + structured::NativeStructuredSupport::ForcedTool + } async fn complete_streaming( &self, @@ -266,10 +303,34 @@ impl LlmClient for AnthropicClient { system: Option<&str>, tools: &[ToolDefinition], cancel_token: CancellationToken, + ) -> Result> { + self.send_streaming(self.build_request(messages, system, tools), cancel_token) + .await + } + + async fn complete_streaming_structured( + &self, + messages: &[Message], + system: Option<&str>, + tools: &[ToolDefinition], + directive: &structured::StructuredDirective, + cancel_token: CancellationToken, + ) -> Result> { + let mut request_body = self.build_request(messages, system, tools); + Self::apply_directive(&mut request_body, directive); + self.send_streaming(request_body, cancel_token).await + } +} + +impl AnthropicClient { + /// Execute a fully-built streaming request body (sets `stream: true`). + async fn send_streaming( + &self, + mut request_body: serde_json::Value, + cancel_token: CancellationToken, ) -> Result> { { let request_started_at = Instant::now(); - let mut request_body = self.build_request(messages, system, tools); request_body["stream"] = serde_json::json!(true); let url = format!("{}/v1/messages", self.base_url); @@ -739,4 +800,40 @@ mod tests { assert_eq!(req["max_tokens"], 16_000); assert_eq!(req["thinking"]["budget_tokens"], 8_000); } + + #[test] + fn test_apply_directive_forces_tool_choice() { + let mut req = serde_json::json!({ "model": "m", "messages": [] }); + let directive = structured::StructuredDirective { + force_tool: Some("emit_person".to_string()), + response_format: None, + }; + AnthropicClient::apply_directive(&mut req, &directive); + assert_eq!(req["tool_choice"]["type"], "tool"); + assert_eq!(req["tool_choice"]["name"], "emit_person"); + } + + #[test] + fn test_apply_directive_ignores_response_format() { + // Anthropic has no response_format; both a response_format-only and an + // empty directive must be no-ops. + let mut req = serde_json::json!({ "model": "m" }); + AnthropicClient::apply_directive( + &mut req, + &structured::StructuredDirective { + force_tool: None, + response_format: Some(structured::ResponseFormat::JsonObject), + }, + ); + assert!(req.get("response_format").is_none()); + assert!(req.get("tool_choice").is_none()); + } + + #[test] + fn test_native_structured_support_is_forced_tool() { + assert_eq!( + make_client().native_structured_support(), + structured::NativeStructuredSupport::ForcedTool + ); + } } diff --git a/core/src/llm/mod.rs b/core/src/llm/mod.rs index 27b3443..9b8d460 100644 --- a/core/src/llm/mod.rs +++ b/core/src/llm/mod.rs @@ -48,6 +48,44 @@ pub trait LlmClient: Send + Sync { tools: &[ToolDefinition], cancel_token: CancellationToken, ) -> Result>; + + /// Report the strongest provider-native structured-output enforcement this + /// client supports. Used by [`structured`] to decide whether to force a + /// tool call, request a native `response_format`, or fall back to + /// prompt-and-parse. Defaults to no native support. + fn native_structured_support(&self) -> structured::NativeStructuredSupport { + structured::NativeStructuredSupport::None + } + + /// Complete a conversation while honoring a structured-output directive + /// (forced `tool_choice` and/or native `response_format`). + /// + /// The default implementation ignores the directive and behaves exactly + /// like [`LlmClient::complete`], so existing clients keep working unchanged; + /// providers that support native structured output override this. + async fn complete_structured( + &self, + messages: &[Message], + system: Option<&str>, + tools: &[ToolDefinition], + _directive: &structured::StructuredDirective, + ) -> Result { + self.complete(messages, system, tools).await + } + + /// Streaming counterpart of [`LlmClient::complete_structured`]. Defaults to + /// [`LlmClient::complete_streaming`], ignoring the directive. + async fn complete_streaming_structured( + &self, + messages: &[Message], + system: Option<&str>, + tools: &[ToolDefinition], + _directive: &structured::StructuredDirective, + cancel_token: CancellationToken, + ) -> Result> { + self.complete_streaming(messages, system, tools, cancel_token) + .await + } } // Include test modules — these reference internal types via crate paths diff --git a/core/src/llm/openai.rs b/core/src/llm/openai.rs index 67acfa2..85d9f57 100644 --- a/core/src/llm/openai.rs +++ b/core/src/llm/openai.rs @@ -1,6 +1,7 @@ //! OpenAI-compatible LLM client use super::http::{default_http_client, normalize_base_url, HttpClient}; +use super::structured; use super::types::*; use super::LlmClient; use crate::llm::types::{ToolResultContent, ToolResultContentField}; @@ -282,43 +283,80 @@ impl OpenAiClient { } } -#[async_trait] -impl LlmClient for OpenAiClient { - async fn complete( +impl OpenAiClient { + /// Apply a structured-output directive to an OpenAI-compatible request. + /// + /// OpenAI-compatible APIs support both forced function `tool_choice` and + /// native `response_format` (`json_object` / `json_schema` + `strict`). + fn apply_directive( + request: &mut serde_json::Value, + directive: &structured::StructuredDirective, + ) { + if let Some(tool) = &directive.force_tool { + request["tool_choice"] = serde_json::json!({ + "type": "function", + "function": { "name": tool } + }); + } + if let Some(rf) = &directive.response_format { + request["response_format"] = match rf { + structured::ResponseFormat::JsonObject => { + serde_json::json!({ "type": "json_object" }) + } + structured::ResponseFormat::JsonSchema { name, schema } => serde_json::json!({ + "type": "json_schema", + "json_schema": { "name": name, "schema": schema, "strict": true } + }), + }; + } + } + + /// Build a chat-completions request body, optionally applying a directive. + fn build_chat_request( &self, messages: &[Message], system: Option<&str>, tools: &[ToolDefinition], - ) -> Result { - { - let request_started_at = Instant::now(); - let mut openai_messages = Vec::new(); + directive: Option<&structured::StructuredDirective>, + ) -> serde_json::Value { + let mut openai_messages = Vec::new(); + + if let Some(sys) = system { + openai_messages.push(serde_json::json!({ + "role": "system", + "content": sys, + })); + } - if let Some(sys) = system { - openai_messages.push(serde_json::json!({ - "role": "system", - "content": sys, - })); - } + openai_messages.extend(self.convert_messages(messages)); - openai_messages.extend(self.convert_messages(messages)); + let mut request = serde_json::json!({ + "model": self.model, + "messages": openai_messages, + }); - let mut request = serde_json::json!({ - "model": self.model, - "messages": openai_messages, - }); + if let Some(temp) = self.temperature { + request["temperature"] = serde_json::json!(temp); + } + if let Some(max) = self.max_tokens { + request["max_tokens"] = serde_json::json!(max); + } - if let Some(temp) = self.temperature { - request["temperature"] = serde_json::json!(temp); - } - if let Some(max) = self.max_tokens { - request["max_tokens"] = serde_json::json!(max); - } + if !tools.is_empty() { + request["tools"] = serde_json::json!(self.convert_tools(tools)); + } - if !tools.is_empty() { - request["tools"] = serde_json::json!(self.convert_tools(tools)); - } + if let Some(directive) = directive { + Self::apply_directive(&mut request, directive); + } + request + } + + /// Execute a fully-built (non-streaming) chat-completions request. + async fn send_request(&self, request: serde_json::Value) -> Result { + { + let request_started_at = Instant::now(); let url = format!("{}{}", self.base_url, self.chat_completions_path); let request_headers = self.request_headers(); @@ -442,45 +480,76 @@ impl LlmClient for OpenAiClient { Ok(llm_response) } } +} - async fn complete_streaming( +#[async_trait] +impl LlmClient for OpenAiClient { + async fn complete( &self, messages: &[Message], system: Option<&str>, tools: &[ToolDefinition], - cancel_token: tokio_util::sync::CancellationToken, - ) -> Result> { - { - let request_started_at = Instant::now(); - let mut openai_messages = Vec::new(); - - if let Some(sys) = system { - openai_messages.push(serde_json::json!({ - "role": "system", - "content": sys, - })); - } + ) -> Result { + self.send_request(self.build_chat_request(messages, system, tools, None)) + .await + } - openai_messages.extend(self.convert_messages(messages)); + async fn complete_structured( + &self, + messages: &[Message], + system: Option<&str>, + tools: &[ToolDefinition], + directive: &structured::StructuredDirective, + ) -> Result { + self.send_request(self.build_chat_request(messages, system, tools, Some(directive))) + .await + } - let mut request = serde_json::json!({ - "model": self.model, - "messages": openai_messages, - "stream": true, - "stream_options": { "include_usage": true }, - }); + fn native_structured_support(&self) -> structured::NativeStructuredSupport { + structured::NativeStructuredSupport::JsonSchema + } - if let Some(temp) = self.temperature { - request["temperature"] = serde_json::json!(temp); - } - if let Some(max) = self.max_tokens { - request["max_tokens"] = serde_json::json!(max); - } + async fn complete_streaming( + &self, + messages: &[Message], + system: Option<&str>, + tools: &[ToolDefinition], + cancel_token: tokio_util::sync::CancellationToken, + ) -> Result> { + self.send_streaming( + self.build_chat_request(messages, system, tools, None), + cancel_token, + ) + .await + } - if !tools.is_empty() { - request["tools"] = serde_json::json!(self.convert_tools(tools)); - } + async fn complete_streaming_structured( + &self, + messages: &[Message], + system: Option<&str>, + tools: &[ToolDefinition], + directive: &structured::StructuredDirective, + cancel_token: tokio_util::sync::CancellationToken, + ) -> Result> { + self.send_streaming( + self.build_chat_request(messages, system, tools, Some(directive)), + cancel_token, + ) + .await + } +} +impl OpenAiClient { + /// Execute a fully-built streaming chat-completions request (sets `stream`). + async fn send_streaming( + &self, + mut request: serde_json::Value, + cancel_token: tokio_util::sync::CancellationToken, + ) -> Result> { + { + request["stream"] = serde_json::json!(true); + request["stream_options"] = serde_json::json!({ "include_usage": true }); + let request_started_at = Instant::now(); let url = format!("{}{}", self.base_url, self.chat_completions_path); let request_headers = self.request_headers(); @@ -1107,3 +1176,99 @@ pub(crate) struct OpenAiFunctionDelta { pub(crate) name: Option, pub(crate) arguments: Option, } + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::llm::types::{Message, ToolDefinition}; + + fn make_client() -> OpenAiClient { + OpenAiClient::new("test-key".to_string(), "gpt-test".to_string()) + } + + #[test] + fn test_apply_directive_forced_function_tool_choice() { + let mut req = serde_json::json!({ "model": "m" }); + OpenAiClient::apply_directive( + &mut req, + &structured::StructuredDirective { + force_tool: Some("emit_person".to_string()), + response_format: None, + }, + ); + assert_eq!(req["tool_choice"]["type"], "function"); + assert_eq!(req["tool_choice"]["function"]["name"], "emit_person"); + assert!(req.get("response_format").is_none()); + } + + #[test] + fn test_apply_directive_json_schema_strict() { + let mut req = serde_json::json!({}); + OpenAiClient::apply_directive( + &mut req, + &structured::StructuredDirective { + force_tool: None, + response_format: Some(structured::ResponseFormat::JsonSchema { + name: "person".to_string(), + schema: serde_json::json!({ "type": "object" }), + }), + }, + ); + assert_eq!(req["response_format"]["type"], "json_schema"); + assert_eq!(req["response_format"]["json_schema"]["name"], "person"); + assert_eq!(req["response_format"]["json_schema"]["strict"], true); + assert!(req.get("tool_choice").is_none()); + } + + #[test] + fn test_apply_directive_json_object() { + let mut req = serde_json::json!({}); + OpenAiClient::apply_directive( + &mut req, + &structured::StructuredDirective { + force_tool: None, + response_format: Some(structured::ResponseFormat::JsonObject), + }, + ); + assert_eq!(req["response_format"]["type"], "json_object"); + } + + #[test] + fn test_build_chat_request_applies_directive_and_system() { + let req = make_client().build_chat_request( + &[Message::user("hi")], + Some("sys"), + &[ToolDefinition { + name: "emit_x".to_string(), + description: "emit".to_string(), + parameters: serde_json::json!({ "type": "object" }), + }], + Some(&structured::StructuredDirective { + force_tool: Some("emit_x".to_string()), + response_format: None, + }), + ); + assert_eq!(req["messages"][0]["role"], "system"); + assert_eq!(req["tool_choice"]["function"]["name"], "emit_x"); + assert_eq!(req["tools"][0]["function"]["name"], "emit_x"); + } + + #[test] + fn test_build_chat_request_without_directive_is_plain() { + let req = make_client().build_chat_request(&[Message::user("hi")], None, &[], None); + assert!(req.get("tool_choice").is_none()); + assert!(req.get("response_format").is_none()); + } + + #[test] + fn test_native_structured_support_is_json_schema() { + assert_eq!( + make_client().native_structured_support(), + structured::NativeStructuredSupport::JsonSchema + ); + } +} diff --git a/core/src/llm/structured.rs b/core/src/llm/structured.rs index 3db0378..8043947 100644 --- a/core/src/llm/structured.rs +++ b/core/src/llm/structured.rs @@ -54,6 +54,50 @@ pub struct StructuredResult { pub mode_used: StructuredMode, } +/// Provider-native structured-output capability. +/// +/// Each [`LlmClient`] reports this so the structured engine can request the +/// strongest enforcement the provider actually supports. Defaults to +/// [`NativeStructuredSupport::None`] for clients that don't override it. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum NativeStructuredSupport { + /// No native enforcement — rely on prompt instructions + lenient extraction. + None, + /// Can force a specific tool call (Anthropic `tool_choice`, OpenAI function + /// `tool_choice`). Guarantees the model emits the structured tool call + /// instead of free-form prose. + ForcedTool, + /// Supports OpenAI-style `response_format` (`json_object` and + /// `json_schema` + `strict`) in addition to forced tool calls. + JsonSchema, +} + +/// A native `response_format` request for OpenAI-compatible providers. +#[derive(Debug, Clone, PartialEq)] +pub enum ResponseFormat { + /// `{"type":"json_object"}` — guarantees syntactically valid JSON, but not + /// schema conformance. + JsonObject, + /// `{"type":"json_schema","json_schema":{name,schema,strict:true}}` — + /// parser-enforced schema conformance. + JsonSchema { name: String, schema: Value }, +} + +/// Instruction telling a provider how to enforce structured output for a call. +/// +/// Carries the union of intents; each provider honors what it supports and +/// ignores the rest (e.g. Anthropic has no `response_format`, so it only acts +/// on `force_tool`). The default (`force_tool: None, response_format: None`) +/// reproduces an ordinary completion, which is why the trait's default +/// `complete_structured` impl is behavior-preserving. +#[derive(Debug, Clone, Default, PartialEq)] +pub struct StructuredDirective { + /// Force the model to call exactly this tool (provider `tool_choice`). + pub force_tool: Option, + /// Request a provider-native `response_format` (OpenAI-compatible only). + pub response_format: Option, +} + /// Callback for streaming partial object snapshots. pub type PartialObjectCallback = Box; @@ -69,17 +113,18 @@ pub async fn generate_blocking( client: &dyn LlmClient, req: &StructuredRequest, ) -> Result { - let mode = req.mode; + let mode = resolve_mode(req.mode, client.native_structured_support()); let mut messages = build_initial_messages(req, mode); let system = build_system_prompt(req, mode); let tools = build_tools(req, mode); + let directive = build_directive(req, mode); let mut total_usage = TokenUsage::default(); let mut repair_rounds: u8 = 0; loop { let resp = client - .complete(&messages, Some(&system), &tools) + .complete_structured(&messages, Some(&system), &tools, &directive) .await .context("LLM call failed during structured generation")?; @@ -153,14 +198,15 @@ pub async fn generate_streaming( req: &StructuredRequest, on_partial: PartialObjectCallback, ) -> Result { - let mode = req.mode; + let mode = resolve_mode(req.mode, client.native_structured_support()); let messages = build_initial_messages(req, mode); let system = build_system_prompt(req, mode); let tools = build_tools(req, mode); + let directive = build_directive(req, mode); let cancel_token = CancellationToken::new(); let mut rx = client - .complete_streaming(&messages, Some(&system), &tools, cancel_token) + .complete_streaming_structured(&messages, Some(&system), &tools, &directive, cancel_token) .await .context("LLM streaming call failed during structured generation")?; @@ -802,6 +848,50 @@ fn value_type_name(value: &Value) -> &'static str { // Message/prompt construction helpers // --------------------------------------------------------------------------- +/// Resolve the requested mode against the provider's native capability. +/// +/// `Auto`/`Tool` always resolve to forced `Tool` mode — the most reliable +/// cross-provider strategy (the synthetic `emit_*` tool is made mandatory via +/// the provider's `tool_choice`). `Strict`/`Json` use native `response_format` +/// only when the provider reports [`NativeStructuredSupport::JsonSchema`]; +/// otherwise they fall back to forced `Tool` mode rather than silently +/// degrading to unconstrained text. +fn resolve_mode(requested: StructuredMode, support: NativeStructuredSupport) -> StructuredMode { + match requested { + StructuredMode::Prompt => StructuredMode::Prompt, + StructuredMode::Strict if support == NativeStructuredSupport::JsonSchema => { + StructuredMode::Strict + } + StructuredMode::Json if support == NativeStructuredSupport::JsonSchema => { + StructuredMode::Json + } + // Auto, Tool, or Strict/Json on a provider without json_schema support. + _ => StructuredMode::Tool, + } +} + +/// Build the provider directive for an already-resolved mode. +fn build_directive(req: &StructuredRequest, mode: StructuredMode) -> StructuredDirective { + match mode { + StructuredMode::Tool => StructuredDirective { + force_tool: Some(format!("emit_{}", req.schema_name)), + response_format: None, + }, + StructuredMode::Strict => StructuredDirective { + force_tool: None, + response_format: Some(ResponseFormat::JsonSchema { + name: req.schema_name.clone(), + schema: req.schema.clone(), + }), + }, + StructuredMode::Json => StructuredDirective { + force_tool: None, + response_format: Some(ResponseFormat::JsonObject), + }, + StructuredMode::Auto | StructuredMode::Prompt => StructuredDirective::default(), + } +} + fn build_initial_messages(req: &StructuredRequest, mode: StructuredMode) -> Vec { match mode { StructuredMode::Tool => { @@ -809,8 +899,10 @@ fn build_initial_messages(req: &StructuredRequest, mode: StructuredMode) -> Vec< // with a tool call whose input is the structured object. vec![Message::user(&req.prompt)] } - StructuredMode::Prompt => { - // Append schema instructions to the user prompt + StructuredMode::Prompt | StructuredMode::Json => { + // Prompt mode and json_object mode both need the schema in the prompt: + // json_object only guarantees *syntactic* validity, so the model still + // has to be told the shape it should produce. let augmented = format!( "{}\n\nYou MUST respond with ONLY a valid JSON object (no markdown, no explanation) that conforms to this JSON Schema:\n\n```json\n{}\n```", req.prompt, @@ -819,8 +911,8 @@ fn build_initial_messages(req: &StructuredRequest, mode: StructuredMode) -> Vec< vec![Message::user(&augmented)] } _ => { - // Strict/Json modes: the schema constraint is enforced by the provider, - // so the user message is just the prompt. + // Strict mode: the schema constraint is enforced by the provider via + // response_format.json_schema, so the user message is just the prompt. vec![Message::user(&req.prompt)] } } @@ -838,7 +930,7 @@ fn build_system_prompt(req: &StructuredRequest, mode: StructuredMode) -> String req.schema_name ) } - StructuredMode::Prompt => { + StructuredMode::Prompt | StructuredMode::Json => { format!( "{}{}You are a structured data extraction assistant. Always respond with valid JSON only, no markdown fences, no explanation text.", base, diff --git a/core/src/llm/structured_tests.rs b/core/src/llm/structured_tests.rs index 3ba3cc0..d2d5bdf 100644 --- a/core/src/llm/structured_tests.rs +++ b/core/src/llm/structured_tests.rs @@ -1322,3 +1322,336 @@ async fn test_integration_generate_complex_nested_schema() { error["code"], result.repair_rounds ); } + +// ======================================================================== +// Native structured-output routing (capability + directive) tests +// +// These verify the core stability fix: the structured engine resolves the +// mode against the client's native capability and passes the correct +// directive (forced tool_choice vs. response_format) to complete_structured. +// ======================================================================== + +/// A client that records the directive + tool surface it was asked to honor, +/// and reports a configurable native capability. +struct RecordingClient { + support: NativeStructuredSupport, + responses: Mutex>, + last_directive: Mutex>, + last_tool_names: Mutex>, + structured_calls: std::sync::atomic::AtomicUsize, +} + +impl RecordingClient { + fn new(support: NativeStructuredSupport, responses: Vec) -> Self { + Self { + support, + responses: Mutex::new(responses), + last_directive: Mutex::new(None), + last_tool_names: Mutex::new(Vec::new()), + structured_calls: std::sync::atomic::AtomicUsize::new(0), + } + } + + fn pop(&self) -> anyhow::Result { + let mut responses = self.responses.lock().unwrap(); + if responses.is_empty() { + anyhow::bail!("No more mock responses"); + } + Ok(responses.remove(0)) + } +} + +#[async_trait] +impl LlmClient for RecordingClient { + async fn complete( + &self, + _messages: &[Message], + _system: Option<&str>, + _tools: &[ToolDefinition], + ) -> anyhow::Result { + self.pop() + } + + async fn complete_streaming( + &self, + _messages: &[Message], + _system: Option<&str>, + _tools: &[ToolDefinition], + _cancel_token: CancellationToken, + ) -> anyhow::Result> { + anyhow::bail!("streaming not used in routing tests") + } + + fn native_structured_support(&self) -> NativeStructuredSupport { + self.support + } + + async fn complete_structured( + &self, + _messages: &[Message], + _system: Option<&str>, + tools: &[ToolDefinition], + directive: &StructuredDirective, + ) -> anyhow::Result { + self.structured_calls + .fetch_add(1, std::sync::atomic::Ordering::SeqCst); + *self.last_directive.lock().unwrap() = Some(directive.clone()); + *self.last_tool_names.lock().unwrap() = tools.iter().map(|t| t.name.clone()).collect(); + self.pop() + } + + async fn complete_streaming_structured( + &self, + _messages: &[Message], + _system: Option<&str>, + tools: &[ToolDefinition], + directive: &StructuredDirective, + _cancel_token: CancellationToken, + ) -> anyhow::Result> { + self.structured_calls + .fetch_add(1, std::sync::atomic::Ordering::SeqCst); + *self.last_directive.lock().unwrap() = Some(directive.clone()); + *self.last_tool_names.lock().unwrap() = tools.iter().map(|t| t.name.clone()).collect(); + let response = self.pop()?; + let (tx, rx) = mpsc::channel(10); + tokio::spawn(async move { + for block in &response.message.content { + if let ContentBlock::ToolUse { name, input, .. } = block { + tx.send(StreamEvent::ToolUseStart { + id: "call_001".to_string(), + name: name.clone(), + }) + .await + .ok(); + let json_str = serde_json::to_string(input).unwrap(); + for chunk in json_str.as_bytes().chunks(8) { + tx.send(StreamEvent::ToolUseInputDelta( + String::from_utf8_lossy(chunk).to_string(), + )) + .await + .ok(); + } + } else if let ContentBlock::Text { text } = block { + for chunk in text.as_bytes().chunks(8) { + tx.send(StreamEvent::TextDelta( + String::from_utf8_lossy(chunk).to_string(), + )) + .await + .ok(); + } + } + } + tx.send(StreamEvent::Done(response)).await.ok(); + }); + Ok(rx) + } +} + +fn person_request(mode: StructuredMode) -> StructuredRequest { + StructuredRequest { + prompt: "Extract the person".to_string(), + system: None, + schema: serde_json::json!({ + "type": "object", + "required": ["name"], + "properties": { "name": { "type": "string" } } + }), + schema_name: "person".to_string(), + schema_description: None, + mode, + max_repair_attempts: 0, + } +} + +#[tokio::test] +async fn test_routing_tool_mode_forces_tool_choice() { + let client = RecordingClient::new( + NativeStructuredSupport::ForcedTool, + vec![MockStructuredClient::tool_call_response( + "emit_person", + serde_json::json!({ "name": "Bob" }), + )], + ); + let result = generate_blocking(&client, &person_request(StructuredMode::Tool)) + .await + .unwrap(); + + assert_eq!(result.object["name"], "Bob"); + assert_eq!(result.mode_used, StructuredMode::Tool); + assert_eq!( + client + .structured_calls + .load(std::sync::atomic::Ordering::SeqCst), + 1 + ); + + let directive = client.last_directive.lock().unwrap().clone().unwrap(); + assert_eq!(directive.force_tool.as_deref(), Some("emit_person")); + assert!(directive.response_format.is_none()); + // The synthetic emit tool must be present in the tool surface. + assert_eq!( + client.last_tool_names.lock().unwrap().as_slice(), + &["emit_person".to_string()] + ); +} + +#[tokio::test] +async fn test_routing_auto_collapses_to_forced_tool() { + let client = RecordingClient::new( + NativeStructuredSupport::JsonSchema, + vec![MockStructuredClient::tool_call_response( + "emit_person", + serde_json::json!({ "name": "Bob" }), + )], + ); + let result = generate_blocking(&client, &person_request(StructuredMode::Auto)) + .await + .unwrap(); + + assert_eq!(result.mode_used, StructuredMode::Tool); + let directive = client.last_directive.lock().unwrap().clone().unwrap(); + assert_eq!(directive.force_tool.as_deref(), Some("emit_person")); +} + +#[tokio::test] +async fn test_routing_strict_uses_response_format_when_supported() { + let client = RecordingClient::new( + NativeStructuredSupport::JsonSchema, + vec![MockStructuredClient::text_response(r#"{"name": "Bob"}"#)], + ); + let result = generate_blocking(&client, &person_request(StructuredMode::Strict)) + .await + .unwrap(); + + assert_eq!(result.object["name"], "Bob"); + assert_eq!(result.mode_used, StructuredMode::Strict); + + let directive = client.last_directive.lock().unwrap().clone().unwrap(); + assert!(directive.force_tool.is_none()); + match directive.response_format { + Some(ResponseFormat::JsonSchema { + ref name, + ref schema, + }) => { + assert_eq!(name, "person"); + assert_eq!(schema["required"][0], "name"); + } + other => panic!("expected json_schema response_format, got {:?}", other), + } + // Strict mode must not inject a tool. + assert!(client.last_tool_names.lock().unwrap().is_empty()); +} + +#[tokio::test] +async fn test_routing_strict_falls_back_to_tool_without_support() { + // A provider with no native json_schema support must NOT silently degrade + // to unconstrained text — it falls back to forced tool mode. + let client = RecordingClient::new( + NativeStructuredSupport::ForcedTool, + vec![MockStructuredClient::tool_call_response( + "emit_person", + serde_json::json!({ "name": "Bob" }), + )], + ); + let result = generate_blocking(&client, &person_request(StructuredMode::Strict)) + .await + .unwrap(); + + assert_eq!(result.mode_used, StructuredMode::Tool); + let directive = client.last_directive.lock().unwrap().clone().unwrap(); + assert_eq!(directive.force_tool.as_deref(), Some("emit_person")); + assert!(directive.response_format.is_none()); +} + +#[tokio::test] +async fn test_routing_json_uses_json_object_when_supported() { + let client = RecordingClient::new( + NativeStructuredSupport::JsonSchema, + vec![MockStructuredClient::text_response(r#"{"name": "Bob"}"#)], + ); + let result = generate_blocking(&client, &person_request(StructuredMode::Json)) + .await + .unwrap(); + + assert_eq!(result.mode_used, StructuredMode::Json); + let directive = client.last_directive.lock().unwrap().clone().unwrap(); + assert_eq!(directive.response_format, Some(ResponseFormat::JsonObject)); + assert!(directive.force_tool.is_none()); +} + +#[tokio::test] +async fn test_streaming_routing_tool_mode_forces_tool_choice() { + // The streaming path must also force the directive (via + // complete_streaming_structured), not silently drop it. + let client = RecordingClient::new( + NativeStructuredSupport::ForcedTool, + vec![MockStructuredClient::tool_call_response( + "emit_person", + serde_json::json!({ "name": "Bob" }), + )], + ); + + let partials = Arc::new(Mutex::new(0usize)); + let partials_cb = partials.clone(); + let result = generate_streaming( + &client, + &person_request(StructuredMode::Tool), + Box::new(move |_partial| { + *partials_cb.lock().unwrap() += 1; + }), + ) + .await + .unwrap(); + + assert_eq!(result.object["name"], "Bob"); + assert_eq!(result.mode_used, StructuredMode::Tool); + let directive = client.last_directive.lock().unwrap().clone().unwrap(); + assert_eq!(directive.force_tool.as_deref(), Some("emit_person")); + assert!( + *partials.lock().unwrap() >= 1, + "on_partial should fire at least once (final object)" + ); +} + +// ======================================================================== +// Adversarial JSON extraction edge cases +// ======================================================================== + +#[test] +fn test_extract_json_crlf_code_fence() { + let input = "```json\r\n{\"a\": 1}\r\n```"; + let result = extract_json_value(input).unwrap(); + assert_eq!(result["a"], 1); +} + +#[test] +fn test_extract_json_prose_with_brace_in_string() { + // A `}` inside a string value, plus trailing prose: balanced extraction + // must keep the in-string brace and stop at the real closing brace. + let input = "Sure thing: {\"msg\": \"close with } please\"} — done."; + let result = extract_json_value(input).unwrap(); + assert_eq!(result["msg"], "close with } please"); +} + +#[test] +fn test_extract_json_single_quotes_rejected() { + // Single-quoted pseudo-JSON is NOT valid JSON; extraction must fail rather + // than silently mis-parse. (Documents current, intentional behavior.) + assert!(extract_json_value("{'name': 'Bob'}").is_err()); +} + +#[test] +fn test_extract_raw_output_tool_mode_falls_back_to_text() { + // If the model ignores the forced tool and replies with text anyway, tool + // mode must fall back to reading the message text. + let msg = Message { + role: "assistant".to_string(), + content: vec![ContentBlock::Text { + text: r#"{"name": "Bob"}"#.to_string(), + }], + reasoning_content: None, + }; + let raw = extract_raw_output(&msg, StructuredMode::Tool); + let value = extract_json_value(&raw).unwrap(); + assert_eq!(value["name"], "Bob"); +} diff --git a/core/src/llm/zhipu.rs b/core/src/llm/zhipu.rs index 2ad1218..47f052a 100644 --- a/core/src/llm/zhipu.rs +++ b/core/src/llm/zhipu.rs @@ -4,6 +4,7 @@ //! This client wraps `OpenAiClient` with the correct GLM defaults. use super::openai::OpenAiClient; +use super::structured; use super::types::*; use super::LlmClient; use crate::retry::RetryConfig; @@ -79,4 +80,33 @@ impl LlmClient for ZhipuClient { .complete_streaming(messages, system, tools, cancel_token) .await } + + fn native_structured_support(&self) -> structured::NativeStructuredSupport { + self.0.native_structured_support() + } + + async fn complete_structured( + &self, + messages: &[Message], + system: Option<&str>, + tools: &[ToolDefinition], + directive: &structured::StructuredDirective, + ) -> Result { + self.0 + .complete_structured(messages, system, tools, directive) + .await + } + + async fn complete_streaming_structured( + &self, + messages: &[Message], + system: Option<&str>, + tools: &[ToolDefinition], + directive: &structured::StructuredDirective, + cancel_token: CancellationToken, + ) -> Result> { + self.0 + .complete_streaming_structured(messages, system, tools, directive, cancel_token) + .await + } } diff --git a/core/src/planning/llm_planner.rs b/core/src/planning/llm_planner.rs index 11f7f50..e1a4669 100644 --- a/core/src/planning/llm_planner.rs +++ b/core/src/planning/llm_planner.rs @@ -215,19 +215,41 @@ impl LlmPlanner { pub async fn pre_analyze(llm: &Arc, prompt: &str) -> Result { let system = crate::prompts::PRE_ANALYSIS_SYSTEM; - let messages = vec![Message::user(prompt)]; - let response = llm - .complete(&messages, Some(system), &[]) - .await - .context("LLM pre-analysis call failed")?; + // One initial attempt plus one repair round: if the model returns + // unparseable JSON, re-prompt it once to emit strictly valid JSON before + // giving up (callers fall back to heuristics on the returned error). + const MAX_ATTEMPTS: usize = 2; + let mut messages = vec![Message::user(prompt)]; + let mut last_err: Option = None; + + for attempt in 0..MAX_ATTEMPTS { + let response = llm + .complete(&messages, Some(system), &[]) + .await + .context("LLM pre-analysis call failed")?; + + let text = response.text(); + match Self::parse_pre_analysis_response(&text, prompt) { + Ok(analysis) => return Ok(analysis), + Err(e) => { + last_err = Some(e); + if attempt + 1 < MAX_ATTEMPTS { + messages.push(response.message.clone()); + messages.push(Message::user( + "Your previous response was not valid JSON matching the required \ + schema. Respond again with ONLY the JSON object — no markdown \ + fences, no prose, no explanation.", + )); + } + } + } + } - let text = response.text(); - Self::parse_pre_analysis_response(&text, prompt) + Err(last_err.unwrap_or_else(|| anyhow::anyhow!("pre-analysis produced no result"))) } fn parse_pre_analysis_response(text: &str, original_prompt: &str) -> Result { - let cleaned = Self::extract_json(text); - let parsed: PreAnalysisResponse = serde_json::from_str(cleaned) + let parsed: PreAnalysisResponse = Self::parse_json_lenient(text) .context("Failed to parse pre-analysis JSON from LLM response")?; let intent = match parsed.intent.to_lowercase().as_str() { @@ -286,9 +308,8 @@ impl LlmPlanner { // ======================================================================== fn parse_plan_response(text: &str) -> Result { - let cleaned = Self::extract_json(text); - let parsed: PlanResponse = - serde_json::from_str(cleaned).context("Failed to parse plan JSON from LLM response")?; + let parsed: PlanResponse = Self::parse_json_lenient(text) + .context("Failed to parse plan JSON from LLM response")?; let complexity = match parsed.complexity.as_str() { "Simple" => Complexity::Simple, @@ -322,16 +343,14 @@ impl LlmPlanner { } fn parse_goal_response(text: &str) -> Result { - let cleaned = Self::extract_json(text); - let parsed: GoalResponse = - serde_json::from_str(cleaned).context("Failed to parse goal JSON from LLM response")?; + let parsed: GoalResponse = Self::parse_json_lenient(text) + .context("Failed to parse goal JSON from LLM response")?; Ok(AgentGoal::new(parsed.description).with_criteria(parsed.success_criteria)) } fn parse_achievement_response(text: &str) -> Result { - let cleaned = Self::extract_json(text); - let parsed: AchievementResponse = serde_json::from_str(cleaned) + let parsed: AchievementResponse = Self::parse_json_lenient(text) .context("Failed to parse achievement JSON from LLM response")?; Ok(AchievementResult { @@ -341,20 +360,15 @@ impl LlmPlanner { }) } - /// Extract JSON from LLM text that may contain markdown fences - fn extract_json(text: &str) -> &str { - let trimmed = text.trim(); - - // Strip markdown code fences if present - if let Some(start) = trimmed.find('{') { - if let Some(end) = trimmed.rfind('}') { - if start <= end { - return &trimmed[start..=end]; - } - } - } - - trimmed + /// Parse JSON from possibly-dirty LLM output into the target type. + /// + /// Uses the shared robust extractor ([`crate::llm::structured::extract_json_value`]), + /// which handles ```json fences, surrounding prose, and braces embedded in + /// strings — unlike the previous naive first-`{`/last-`}` slice, which broke + /// on fenced output or any `}` inside a string value. + fn parse_json_lenient(text: &str) -> Result { + let value = crate::llm::structured::extract_json_value(text)?; + Ok(serde_json::from_value(value)?) } } @@ -521,19 +535,125 @@ mod tests { } #[test] - fn test_extract_json_plain() { - assert_eq!(LlmPlanner::extract_json(" {\"a\": 1} "), "{\"a\": 1}"); + fn test_parse_json_lenient_plain() { + let v: serde_json::Value = LlmPlanner::parse_json_lenient(" {\"a\": 1} ").unwrap(); + assert_eq!(v["a"], 1); } #[test] - fn test_extract_json_with_fences() { + fn test_parse_json_lenient_with_fences() { let text = "```json\n{\"a\": 1}\n```"; - assert_eq!(LlmPlanner::extract_json(text), "{\"a\": 1}"); + let v: serde_json::Value = LlmPlanner::parse_json_lenient(text).unwrap(); + assert_eq!(v["a"], 1); } #[test] - fn test_extract_json_with_surrounding_text() { + fn test_parse_json_lenient_with_surrounding_prose() { let text = "Here is the plan:\n{\"goal\": \"test\"}\nDone."; - assert_eq!(LlmPlanner::extract_json(text), "{\"goal\": \"test\"}"); + let v: serde_json::Value = LlmPlanner::parse_json_lenient(text).unwrap(); + assert_eq!(v["goal"], "test"); + } + + #[test] + fn test_parse_json_lenient_brace_inside_string_value() { + // The old naive first-`{`/last-`}` slice broke when a string value + // contained a `}` followed by trailing prose; the robust extractor + // balances braces while respecting string boundaries. + let text = "Result: {\"note\": \"use a closing brace } here\"} -- end."; + let v: serde_json::Value = LlmPlanner::parse_json_lenient(text).unwrap(); + assert_eq!(v["note"], "use a closing brace } here"); + } + + #[test] + fn test_parse_json_lenient_fenced_with_trailing_prose() { + // ```json fence followed by an explanation. The naive parser's + // `rfind('}')` could grab a brace from the trailing prose. + let text = "```json\n{\"goal\": \"ship\"}\n```\nNote: revisit the `plan` later."; + let v: serde_json::Value = LlmPlanner::parse_json_lenient(text).unwrap(); + assert_eq!(v["goal"], "ship"); + } + + #[test] + fn test_parse_json_lenient_rejects_non_json() { + let err = LlmPlanner::parse_json_lenient::("no json here at all"); + assert!(err.is_err()); + } + + /// Replays a fixed sequence of assistant text responses, one per call. + struct ReplayClient { + responses: std::sync::Mutex>, + } + + impl ReplayClient { + fn new(responses: Vec) -> Self { + Self { + responses: std::sync::Mutex::new(responses), + } + } + } + + #[async_trait::async_trait] + impl LlmClient for ReplayClient { + async fn complete( + &self, + _messages: &[Message], + _system: Option<&str>, + _tools: &[crate::llm::ToolDefinition], + ) -> anyhow::Result { + let text = { + let mut r = self.responses.lock().unwrap(); + if r.is_empty() { + String::new() + } else { + r.remove(0) + } + }; + Ok(crate::llm::LlmResponse { + message: Message { + role: "assistant".to_string(), + content: vec![crate::llm::ContentBlock::Text { text }], + reasoning_content: None, + }, + usage: crate::llm::TokenUsage::default(), + stop_reason: None, + meta: None, + }) + } + + async fn complete_streaming( + &self, + _messages: &[Message], + _system: Option<&str>, + _tools: &[crate::llm::ToolDefinition], + _cancel_token: tokio_util::sync::CancellationToken, + ) -> anyhow::Result> { + anyhow::bail!("streaming not used in planner tests") + } + } + + #[tokio::test] + async fn test_pre_analyze_repairs_invalid_json() { + // First response is unparseable; pre_analyze must re-prompt and succeed + // on the second (valid) response. + let good = r#"{"intent":"explore","requires_planning":false,"goal":{"description":"Do x","success_criteria":["done"]},"execution_plan":{"complexity":"Simple","steps":[],"required_tools":[]},"optimized_input":"Do x carefully"}"#; + let client: Arc = Arc::new(ReplayClient::new(vec![ + "Sorry — here's the plan, but not as JSON.".to_string(), + good.to_string(), + ])); + let pa = LlmPlanner::pre_analyze(&client, "do x").await.unwrap(); + assert_eq!(pa.optimized_input, "Do x carefully"); + } + + #[tokio::test] + async fn test_pre_analyze_first_try_with_fenced_json() { + // A single ```json-fenced response must parse on the first attempt + // (robust extractor), with no repair round needed. + let good = format!( + "```json\n{}\n```", + r#"{"intent":"plan","requires_planning":true,"goal":{"description":"g","success_criteria":[]},"execution_plan":{"complexity":"Medium","steps":[],"required_tools":[]},"optimized_input":"opt"}"# + ); + let client: Arc = Arc::new(ReplayClient::new(vec![good])); + let pa = LlmPlanner::pre_analyze(&client, "do x").await.unwrap(); + assert_eq!(pa.optimized_input, "opt"); } } diff --git a/core/src/tools/builtin/generate_object.rs b/core/src/tools/builtin/generate_object.rs index 69ce406..cdbdeaf 100644 --- a/core/src/tools/builtin/generate_object.rs +++ b/core/src/tools/builtin/generate_object.rs @@ -145,16 +145,10 @@ impl Tool for GenerateObjectTool { _ => StructuredMode::Auto, }; - // Resolve mode: Auto → Tool (safest cross-provider default). - // Strict/Json modes require provider-level response_format support which - // is not yet wired — fall back to Tool mode to avoid silent degradation. - let resolved_mode = match mode { - StructuredMode::Auto | StructuredMode::Strict | StructuredMode::Json => { - StructuredMode::Tool - } - other => other, - }; - + // Mode resolution is delegated to the structured engine, which inspects + // the client's native capability: Auto/Tool become forced tool calls, + // and Strict/Json use native `response_format` when the provider + // supports it (falling back to forced Tool mode otherwise). let max_repair_attempts = args .get("max_repair_attempts") .and_then(|v| v.as_u64()) @@ -167,7 +161,7 @@ impl Tool for GenerateObjectTool { schema, schema_name, schema_description, - mode: resolved_mode, + mode, max_repair_attempts, }; diff --git a/core/tests/test_structured_json_real_llm.rs b/core/tests/test_structured_json_real_llm.rs new file mode 100644 index 0000000..072ef10 --- /dev/null +++ b/core/tests/test_structured_json_real_llm.rs @@ -0,0 +1,262 @@ +//! Real-LLM validation of the JSON-object-generation stability fix. +//! +//! Exercises the changed code paths against the live provider configured in +//! `.a3s/config.acl`: +//! * forced-`tool_choice` structured generation (Tool mode) — the core fix, +//! run repeatedly to prove stability; +//! * native `response_format` (Json / Strict modes) on the OpenAI-compatible +//! provider; +//! * the hardened planner pre-analysis JSON path (`LlmPlanner::pre_analyze`). +//! +//! `#[ignore]` — requires a live provider in `.a3s/config.acl` and network +//! access to it. Run: +//! +//! ```bash +//! A3S_CONFIG_FILE=/abs/path/.a3s/config.acl \ +//! cargo test -p a3s-code-core --test test_structured_json_real_llm -- --ignored --nocapture +//! ``` + +use std::path::PathBuf; +use std::sync::Arc; +use std::time::Duration; + +use a3s_code_core::config::CodeConfig; +use a3s_code_core::llm::structured::{ + generate_blocking, generate_streaming, PartialObjectCallback, StructuredMode, + StructuredRequest, StructuredResult, +}; +use a3s_code_core::llm::{create_client_with_config, LlmClient}; +use a3s_code_core::planning::LlmPlanner; +use serde_json::{json, Value}; + +/// Hard ceiling per LLM call so a flaky/hung endpoint fails the test fast +/// instead of stalling for minutes. +const CALL_TIMEOUT: Duration = Duration::from_secs(90); + +fn repo_config_path() -> PathBuf { + std::env::var_os("A3S_CONFIG_FILE") + .map(PathBuf::from) + .unwrap_or_else(|| { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("../../..") + .join(".a3s/config.acl") + }) +} + +/// Build a client from `.a3s/config.acl`. By default uses the config's +/// `default_model`; set `A3S_TEST_MODEL=provider/model` to target a specific +/// model (e.g. a tool-capable one for structured-output tests). +fn real_client() -> Arc { + let path = repo_config_path(); + let config = CodeConfig::from_file(&path) + .unwrap_or_else(|e| panic!("failed to load {}: {e}", path.display())); + + let llm_config = match std::env::var("A3S_TEST_MODEL") { + Ok(spec) => { + let (provider, model) = spec + .split_once('/') + .expect("A3S_TEST_MODEL must be 'provider/model'"); + eprintln!("[real-llm] model = {spec} (from {})", path.display()); + config + .llm_config(provider, model) + .unwrap_or_else(|| panic!("model {spec} not found in {}", path.display())) + } + Err(_) => { + eprintln!("[real-llm] model = (from {})", path.display()); + config + .default_llm_config() + .expect("default llm config in .a3s/config.acl") + } + }; + create_client_with_config(llm_config) +} + +/// Run a structured generation with a hard timeout. +async fn gen_with_timeout( + client: &dyn LlmClient, + req: &StructuredRequest, +) -> anyhow::Result { + match tokio::time::timeout(CALL_TIMEOUT, generate_blocking(client, req)).await { + Ok(res) => res, + Err(_) => anyhow::bail!("LLM call exceeded {CALL_TIMEOUT:?}"), + } +} + +/// A non-trivial nested schema — the kind of object whose generation users +/// reported as unstable. +fn person_schema() -> Value { + json!({ + "type": "object", + "required": ["name", "age", "skills"], + "additionalProperties": false, + "properties": { + "name": { "type": "string" }, + "age": { "type": "integer" }, + "skills": { "type": "array", "items": { "type": "string" } }, + "address": { + "type": "object", + "properties": { "city": { "type": "string" } } + } + } + }) +} + +fn person_request(mode: StructuredMode) -> StructuredRequest { + StructuredRequest { + prompt: "Extract a structured person profile from this text: \ + 'Alice is 30 years old, a Rust and Python developer living in Berlin.'" + .to_string(), + system: None, + schema: person_schema(), + schema_name: "person".to_string(), + schema_description: Some("A person profile".to_string()), + mode, + max_repair_attempts: 2, + } +} + +fn assert_valid_person(object: &Value) { + assert!( + object["name"].is_string(), + "name must be a string, got {object}" + ); + assert!( + object["age"].is_i64() || object["age"].is_u64(), + "age must be an integer, got {object}" + ); + assert!( + object["skills"].is_array(), + "skills must be an array, got {object}" + ); +} + +/// The core fix: forced `tool_choice` structured generation must be STABLE. +/// Run several independent times and require every one to yield a valid, +/// schema-conforming object. +#[tokio::test(flavor = "multi_thread")] +#[ignore = "requires real provider credentials and network access"] +async fn real_structured_tool_mode_is_stable() { + let client = real_client(); + const RUNS: usize = 5; + let mut total_repairs = 0u32; + + for i in 0..RUNS { + let result = gen_with_timeout(client.as_ref(), &person_request(StructuredMode::Tool)) + .await + .unwrap_or_else(|e| panic!("run {i}: forced-tool structured generation failed: {e}")); + + assert_eq!( + result.mode_used, + StructuredMode::Tool, + "run {i}: expected forced Tool mode" + ); + assert_valid_person(&result.object); + total_repairs += result.repair_rounds as u32; + eprintln!( + "[tool] run {i}: ok (repairs={}) -> {}", + result.repair_rounds, result.object + ); + } + + eprintln!( + "[tool] {RUNS}/{RUNS} runs produced valid objects; total repair rounds = {total_repairs}" + ); +} + +/// The streaming structured path must also force `tool_choice` and yield a +/// valid object (the streaming counterpart of the core fix). +#[tokio::test(flavor = "multi_thread")] +#[ignore = "requires real provider credentials and network access"] +async fn real_structured_tool_mode_streaming() { + let client = real_client(); + let partials = std::sync::Arc::new(std::sync::Mutex::new(0usize)); + let partials_cb = partials.clone(); + let on_partial: PartialObjectCallback = Box::new(move |_p| { + *partials_cb.lock().unwrap() += 1; + }); + + let result = tokio::time::timeout( + CALL_TIMEOUT, + generate_streaming( + client.as_ref(), + &person_request(StructuredMode::Tool), + on_partial, + ), + ) + .await + .expect("streaming call timed out") + .expect("streaming tool-mode generation failed"); + + assert_eq!(result.mode_used, StructuredMode::Tool); + assert_valid_person(&result.object); + eprintln!( + "[tool-stream] partials={} -> {}", + *partials.lock().unwrap(), + result.object + ); +} + +/// Native `response_format: json_object` on the OpenAI-compatible provider. +#[tokio::test(flavor = "multi_thread")] +#[ignore = "requires real provider credentials and network access"] +async fn real_structured_json_mode() { + let client = real_client(); + let result = gen_with_timeout(client.as_ref(), &person_request(StructuredMode::Json)) + .await + .expect("json_object structured generation failed"); + assert_valid_person(&result.object); + eprintln!( + "[json] mode_used={:?} repairs={} -> {}", + result.mode_used, result.repair_rounds, result.object + ); +} + +/// Native `response_format: json_schema` (strict). Some providers reject schemas +/// that don't meet their strict subset, so this is tolerant: it must either +/// succeed with a valid object or fail cleanly (never hang or mis-parse). +#[tokio::test(flavor = "multi_thread")] +#[ignore = "requires real provider credentials and network access"] +async fn real_structured_strict_mode() { + let client = real_client(); + match gen_with_timeout(client.as_ref(), &person_request(StructuredMode::Strict)).await { + Ok(result) => { + assert_valid_person(&result.object); + eprintln!( + "[strict] ok mode_used={:?} repairs={} -> {}", + result.mode_used, result.repair_rounds, result.object + ); + } + Err(e) => { + // Acceptable: provider may not support strict json_schema for this + // schema. The point is that it fails cleanly, not silently wrong. + eprintln!("[strict] provider rejected native json_schema (acceptable): {e}"); + } + } +} + +/// The hardened planner pre-analysis path against a real model: the response +/// must parse into a `PreAnalysis` (robust extractor + one repair retry). +#[tokio::test(flavor = "multi_thread")] +#[ignore = "requires real provider credentials and network access"] +async fn real_pre_analyze_parses() { + let client = real_client(); + let analysis = tokio::time::timeout( + CALL_TIMEOUT, + LlmPlanner::pre_analyze( + &client, + "Refactor the auth module in src/auth.rs to use async/await, and keep the public API stable.", + ), + ) + .await + .expect("pre_analyze timed out") + .expect("pre_analyze should parse a real model's JSON response"); + + assert!( + !analysis.optimized_input.trim().is_empty(), + "optimized_input should be populated" + ); + eprintln!( + "[pre_analyze] requires_planning={} optimized_input={:?}", + analysis.requires_planning, analysis.optimized_input + ); +}