From fb25ea88fc177f6a0d1e2edace3150edea89e2e4 Mon Sep 17 00:00:00 2001 From: Ruben Fiszel Date: Sat, 27 Jun 2026 21:23:43 +0200 Subject: [PATCH 1/4] feat: reusable AI agent steps with hybrid linking and evals Co-Authored-By: Claude Opus 4.8 (1M context) --- ...6f6a9252200e32280b2c6fa49a58ae2cb676f.json | 23 + ...0627124035_ai_agent_resource_type.down.sql | 1 + ...260627124035_ai_agent_resource_type.up.sql | 41 ++ backend/windmill-ai/src/types.rs | 89 ++++ backend/windmill-api/openapi.yaml | 73 +++ backend/windmill-api/src/ai_agents.rs | 502 ++++++++++++++++++ backend/windmill-api/src/lib.rs | 2 + backend/windmill-types/src/flows.rs | 13 +- backend/windmill-worker/src/ai_executor.rs | 72 ++- .../windmill-worker/src/worker_lockfiles.rs | 2 + docs/reusable-ai-agents.md | 36 ++ .../components/flows/agentResourceUtils.ts | 115 ++++ .../flows/content/AgentEvalsPanel.svelte | 369 +++++++++++++ .../flows/content/AgentResourceBar.svelte | 181 +++++++ .../flows/content/FlowModuleComponent.svelte | 52 +- openflow.openapi.yaml | 7 + 16 files changed, 1568 insertions(+), 10 deletions(-) create mode 100644 backend/.sqlx/query-8c08eb619da44fd89d5f6f787b96f6a9252200e32280b2c6fa49a58ae2cb676f.json create mode 100644 backend/migrations/20260627124035_ai_agent_resource_type.down.sql create mode 100644 backend/migrations/20260627124035_ai_agent_resource_type.up.sql create mode 100644 backend/windmill-api/src/ai_agents.rs create mode 100644 docs/reusable-ai-agents.md create mode 100644 frontend/src/lib/components/flows/agentResourceUtils.ts create mode 100644 frontend/src/lib/components/flows/content/AgentEvalsPanel.svelte create mode 100644 frontend/src/lib/components/flows/content/AgentResourceBar.svelte diff --git a/backend/.sqlx/query-8c08eb619da44fd89d5f6f787b96f6a9252200e32280b2c6fa49a58ae2cb676f.json b/backend/.sqlx/query-8c08eb619da44fd89d5f6f787b96f6a9252200e32280b2c6fa49a58ae2cb676f.json new file mode 100644 index 0000000000000..63c335a09c6d5 --- /dev/null +++ b/backend/.sqlx/query-8c08eb619da44fd89d5f6f787b96f6a9252200e32280b2c6fa49a58ae2cb676f.json @@ -0,0 +1,23 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT value FROM resource WHERE workspace_id = $1 AND path = $2 AND resource_type = 'ai_agent'", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "value", + "type_info": "Jsonb" + } + ], + "parameters": { + "Left": [ + "Text", + "Text" + ] + }, + "nullable": [ + true + ] + }, + "hash": "8c08eb619da44fd89d5f6f787b96f6a9252200e32280b2c6fa49a58ae2cb676f" +} diff --git a/backend/migrations/20260627124035_ai_agent_resource_type.down.sql b/backend/migrations/20260627124035_ai_agent_resource_type.down.sql new file mode 100644 index 0000000000000..cfe9c91cae7f4 --- /dev/null +++ b/backend/migrations/20260627124035_ai_agent_resource_type.down.sql @@ -0,0 +1 @@ +DELETE FROM resource_type WHERE workspace_id = 'admins' AND name = 'ai_agent'; diff --git a/backend/migrations/20260627124035_ai_agent_resource_type.up.sql b/backend/migrations/20260627124035_ai_agent_resource_type.up.sql new file mode 100644 index 0000000000000..4d18883df4509 --- /dev/null +++ b/backend/migrations/20260627124035_ai_agent_resource_type.up.sql @@ -0,0 +1,41 @@ +-- Built-in `ai_agent` resource type backing reusable AI agent steps. +-- A resource of this type stores an agent's brain (provider/model/system prompt/etc.), +-- its tool set, and its eval suite. Flow steps link to it via FlowModuleValue::AIAgent.agent. +-- +-- Seeded into the `admins` workspace: list_resource_types unions `workspace_id = 'admins'`, +-- so this single row is visible from every workspace (existing and future), mirroring how +-- hub-synced built-in types (e.g. s3object) are made globally available. +INSERT INTO resource_type (workspace_id, name, schema, description, edited_at) VALUES + ('admins', 'ai_agent', '{ + "type": "object", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "required": ["provider"], + "properties": { + "provider": { + "type": "object", + "format": "ai-provider", + "description": "AI provider + model + credentials resource for the agent." + }, + "system_prompt": { "type": "string", "description": "System prompt for the agent." }, + "temperature": { "type": "number", "description": "Sampling temperature (0.0-2.0)." }, + "max_completion_tokens": { "type": "number", "description": "Maximum output tokens." }, + "max_iterations": { "type": "number", "description": "Max reasoning/tool-use loops." }, + "output_type": { "type": "string", "enum": ["text", "image"], "default": "text" }, + "output_schema": { "type": "object", "format": "json-schema", "description": "Structured-output JSON schema." }, + "streaming": { "type": "boolean" }, + "memory": { "type": "object", "description": "Conversation memory config (off/auto/manual)." }, + "tools": { "type": "array", "description": "Reusable tool definitions available to the agent." }, + "evals": { + "type": "object", + "description": "Eval suite: cases graded by deterministic assertions and/or an LLM judge.", + "properties": { + "cases": { "type": "array" }, + "judge": { "type": "object" } + } + } + } + }'::jsonb, + 'A reusable AI agent: provider/model, system prompt, tools and an eval suite. Referenced by AI agent flow steps.', + now()) +ON CONFLICT (workspace_id, name) DO UPDATE + SET schema = EXCLUDED.schema, description = EXCLUDED.description, edited_at = now(); diff --git a/backend/windmill-ai/src/types.rs b/backend/windmill-ai/src/types.rs index 8f864468dbcbb..2bd2e6017f8ed 100644 --- a/backend/windmill-ai/src/types.rs +++ b/backend/windmill-ai/src/types.rs @@ -160,6 +160,95 @@ impl From for AIAgentArgs { } } +// =========================================================================== +// Reusable AI agent — eval suite types +// +// An `ai_agent` resource stores an `AIAgentConfig` (brain + tools + evals). The +// brain fields mirror `AIAgentArgsRaw` and are merged into `AIAgentArgs` at +// runtime via a plain JSON merge, so they are not re-declared here. The types +// below model the eval suite, which the judge/run endpoints inspect directly. +// =========================================================================== + +/// One eval case stored under an `ai_agent` resource's `evals.cases`. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct AgentEvalCase { + pub id: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub name: Option, + pub input: EvalInput, + /// LLM-judge acceptance criteria (each a single bullet the output must satisfy). + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub judge_checklist: Vec, + /// Deterministic checks evaluated without an LLM. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub assertions: Vec, +} + +#[derive(Serialize, Deserialize, Debug, Clone, Default)] +pub struct EvalInput { + #[serde(default, skip_serializing_if = "Option::is_none")] + pub user_message: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub user_attachments: Option>, +} + +/// Deterministic, LLM-free check run against an agent's output. +#[derive(Serialize, Deserialize, Debug, Clone)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum Assertion { + Contains { + value: String, + #[serde(default)] + case_sensitive: bool, + }, + NotContains { + value: String, + #[serde(default)] + case_sensitive: bool, + }, + Regex { + pattern: String, + }, + /// JSONPath-style dotted path into a structured output equals the given value. + JsonPathEquals { + path: String, + value: serde_json::Value, + }, + /// Output validates against the agent's configured `output_schema`. + OutputSchemaValid, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct AssertionResult { + pub assertion: Assertion, + pub passed: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub detail: Option, +} + +/// LLM-judge verdict for one case (0-100 score + pass/fail + rationale). +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct JudgeResult { + pub score: u8, + pub pass: bool, + pub summary: String, +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct EvalCaseResult { + pub case_id: String, + pub passed: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub output: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub assertions: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub judge: Option, + pub latency_ms: u64, +} + #[derive(Deserialize, Debug)] pub struct ProviderResource { #[serde(alias = "apiKey", default, deserialize_with = "empty_string_as_none")] diff --git a/backend/windmill-api/openapi.yaml b/backend/windmill-api/openapi.yaml index e9c57c3fb0523..5108c37c801c2 100644 --- a/backend/windmill-api/openapi.yaml +++ b/backend/windmill-api/openapi.yaml @@ -6768,6 +6768,79 @@ paths: schema: type: string + /w/{workspace}/ai_agents/run: + post: + summary: run a saved AI agent once on an input + operationId: runAiAgent + tags: + - ai_agent + parameters: + - $ref: "#/components/parameters/WorkspaceId" + requestBody: + description: agent resource path and input + required: true + content: + application/json: + schema: + type: object + required: + - agent + properties: + agent: + type: string + description: Path of a saved ai_agent resource + input: + type: object + properties: + user_message: + type: string + user_attachments: + type: array + items: + type: object + responses: + "200": + description: agent output + content: + application/json: + schema: {} + + /w/{workspace}/ai_agents/eval_case: + post: + summary: run a single eval case against a saved AI agent + operationId: evalAiAgentCase + tags: + - ai_agent + parameters: + - $ref: "#/components/parameters/WorkspaceId" + requestBody: + description: agent resource path, eval case, and optional judge provider + required: true + content: + application/json: + schema: + type: object + required: + - agent + - case + properties: + agent: + type: string + description: Path of a saved ai_agent resource + case: + type: object + description: AgentEvalCase (id, input, judge_checklist, assertions) + judge_provider: + type: object + description: Optional ai-provider override for the judge; defaults to the agent's provider + responses: + "200": + description: eval case result (pass/fail, judge verdict, assertion results, output) + content: + application/json: + schema: + type: object + /w/{workspace}/resources/delete/{path}: delete: summary: delete resource diff --git a/backend/windmill-api/src/ai_agents.rs b/backend/windmill-api/src/ai_agents.rs new file mode 100644 index 0000000000000..3871f514f08ca --- /dev/null +++ b/backend/windmill-api/src/ai_agents.rs @@ -0,0 +1,502 @@ +//! Endpoints for reusable AI agents (the `ai_agent` resource type). +//! +//! Agents are run by pushing a single-module flow-preview job whose one step is an +//! `AIAgent` module. For a saved agent that step is *linked* (`agent: Some(path)`), so the +//! brain config + tools resolve at runtime from the resource via the same hybrid-linking path +//! the flow executor uses. The eval judge is itself run as an inline `AIAgent` step with a +//! structured `output_schema`, so the whole feature reuses the existing job machinery. + +use std::collections::HashMap; +use std::time::Instant; + +use axum::{extract::Path, routing::post, Extension, Json, Router}; +use serde::Deserialize; +use serde_json::value::RawValue; + +use windmill_ai::types::{ + AgentEvalCase, Assertion, AssertionResult, EvalCaseResult, EvalInput, JudgeResult, +}; +use windmill_common::error::{self, Error}; +use windmill_common::flows::{FlowModule, FlowModuleValue, FlowValue, InputTransform}; +use windmill_common::jobs::JobPayload; +use windmill_common::worker::to_raw_value; +use windmill_queue::{push, PushArgs, PushIsolationLevel}; + +use crate::db::{ApiAuthed, DB}; +use crate::jobs::run_wait_result_internal; +use crate::utils::check_scopes; +use windmill_common::db::UserDB; +use windmill_common::users::username_to_permissioned_as; + +pub fn workspaced_service() -> Router { + Router::new() + .route("/run", post(run_agent)) + .route("/eval_case", post(eval_case)) +} + +#[derive(Deserialize)] +struct RunAgentRequest { + /// Path of a saved `ai_agent` resource. + agent: String, + #[serde(default)] + input: EvalInput, +} + +#[derive(Deserialize)] +struct EvalCaseRequest { + /// Path of a saved `ai_agent` resource. + agent: String, + case: AgentEvalCase, + /// Optional judge provider (ai-provider shape, may contain nested `$res:`). Defaults to the + /// agent's own provider when omitted. + #[serde(default)] + judge_provider: Option>, +} + +/// Run a saved agent once on a given input and return its raw output. +async fn run_agent( + authed: ApiAuthed, + Extension(db): Extension, + Extension(user_db): Extension, + Path(w_id): Path, + Json(req): Json, +) -> error::Result>> { + check_scopes(&authed, || format!("jobs:run"))?; + let flow = build_linked_agent_flow(&req.agent, &req.input); + let (output, success, _ms) = + run_preview(&db, &user_db, &authed, &w_id, flow).await?; + if !success { + return Err(Error::ExecutionErr(format!( + "agent run failed: {}", + output.get() + ))); + } + Ok(Json(output)) +} + +/// Run a single eval case against a saved agent: execute, apply deterministic assertions, and +/// (if the case has a checklist) grade with an LLM judge. +async fn eval_case( + authed: ApiAuthed, + Extension(db): Extension, + Extension(user_db): Extension, + Path(w_id): Path, + Json(req): Json, +) -> error::Result> { + check_scopes(&authed, || format!("jobs:run"))?; + let case = req.case; + + // 1. Run the agent under test. + let started = Instant::now(); + let flow = build_linked_agent_flow(&req.agent, &case.input); + let run = run_preview(&db, &user_db, &authed, &w_id, flow).await; + let latency_ms = started.elapsed().as_millis() as u64; + + let (output, success) = match run { + Ok((output, success, _)) => (output, success), + Err(e) => { + return Ok(Json(EvalCaseResult { + case_id: case.id, + passed: false, + output: None, + error: Some(e.to_string()), + assertions: vec![], + judge: None, + latency_ms, + })); + } + }; + + if !success { + return Ok(Json(EvalCaseResult { + case_id: case.id, + passed: false, + output: Some(output.clone()), + error: Some(format!("agent run failed: {}", output.get())), + assertions: vec![], + judge: None, + latency_ms, + })); + } + + // 2. Deterministic assertions. + let assertion_results: Vec = case + .assertions + .iter() + .map(|a| evaluate_assertion(a, &output)) + .collect(); + let assertions_pass = assertion_results.iter().all(|r| r.passed); + + // 3. LLM judge (only when a checklist is provided). + let judge = if case.judge_checklist.is_empty() { + None + } else { + let provider = match req.judge_provider { + Some(p) => p, + None => agent_provider(&db, &w_id, &req.agent).await?, + }; + match run_judge(&db, &user_db, &authed, &w_id, provider, &output, &case.judge_checklist) + .await + { + Ok(j) => Some(j), + Err(e) => Some(JudgeResult { + score: 0, + pass: false, + summary: format!("judge failed: {e}"), + }), + } + }; + + let passed = assertions_pass && judge.as_ref().map(|j| j.pass).unwrap_or(true); + + Ok(Json(EvalCaseResult { + case_id: case.id, + passed, + output: Some(output), + error: None, + assertions: assertion_results, + judge, + latency_ms, + })) +} + +// --------------------------------------------------------------------------- +// Flow construction +// --------------------------------------------------------------------------- + +fn static_transform(value: &T) -> InputTransform { + InputTransform::new_static_value(to_raw_value(value)) +} + +fn single_module_flow(value: FlowModuleValue) -> FlowValue { + FlowValue { + modules: vec![FlowModule { + id: "a".to_string(), + value: to_raw_value(&value), + ..Default::default() + }], + ..Default::default() + } +} + +/// A one-step flow whose AIAgent module links to the saved agent resource. Only the flow-local +/// inputs are set locally; the brain + tools resolve from the resource at runtime. +fn build_linked_agent_flow(agent_path: &str, input: &EvalInput) -> FlowValue { + let mut input_transforms = HashMap::new(); + if let Some(msg) = &input.user_message { + input_transforms.insert("user_message".to_string(), static_transform(msg)); + } + if let Some(att) = &input.user_attachments { + input_transforms.insert("user_attachments".to_string(), static_transform(att)); + } + single_module_flow(FlowModuleValue::AIAgent { + input_transforms, + tools: vec![], + tag: None, + omit_output_from_conversation: false, + agent: Some(agent_path.to_string()), + }) +} + +const JUDGE_SYSTEM_PROMPT: &str = "You are a strict evaluator. You are given the OUTPUT produced \ +by an AI agent and a CHECKLIST of acceptance criteria. Judge whether the output satisfies every \ +criterion. Respond ONLY via the structured output: `score` is an integer 0-100 reflecting overall \ +quality against the checklist, `pass` is true only if all criteria are satisfied, and `summary` \ +is one or two sentences explaining the verdict."; + +/// Run the judge as an inline AIAgent step with a structured output schema. +async fn run_judge( + db: &DB, + user_db: &UserDB, + authed: &ApiAuthed, + w_id: &str, + provider: Box, + output: &RawValue, + checklist: &[String], +) -> error::Result { + let checklist_rendered = checklist + .iter() + .map(|c| format!("- {c}")) + .collect::>() + .join("\n"); + let user_message = format!( + "OUTPUT:\n{}\n\nCHECKLIST:\n{}", + output.get(), + checklist_rendered + ); + + let output_schema = serde_json::json!({ + "type": "object", + "properties": { + "score": { "type": "integer", "description": "0-100 overall quality" }, + "pass": { "type": "boolean", "description": "true only if all criteria pass" }, + "summary": { "type": "string", "description": "short rationale" } + }, + "required": ["score", "pass", "summary"] + }); + + let mut it = HashMap::new(); + it.insert("provider".to_string(), InputTransform::new_static_value(provider)); + it.insert("system_prompt".to_string(), static_transform(&JUDGE_SYSTEM_PROMPT)); + it.insert("user_message".to_string(), static_transform(&user_message)); + it.insert("output_type".to_string(), static_transform(&"text")); + it.insert("output_schema".to_string(), static_transform(&output_schema)); + it.insert("max_iterations".to_string(), static_transform(&1)); + + let flow = single_module_flow(FlowModuleValue::AIAgent { + input_transforms: it, + tools: vec![], + tag: None, + omit_output_from_conversation: false, + agent: None, + }); + + let (result, success, _) = run_preview(db, user_db, authed, w_id, flow).await?; + if !success { + return Err(Error::ExecutionErr(format!( + "judge run failed: {}", + result.get() + ))); + } + parse_judge_result(&result) +} + +/// Lenient parse: tolerate float scores and out-of-range values from the model. +fn parse_judge_result(value: &RawValue) -> error::Result { + let v: serde_json::Value = serde_json::from_str(value.get()) + .map_err(|e| Error::internal_err(format!("judge returned non-JSON output: {e}")))?; + let score = v + .get("score") + .and_then(|s| s.as_f64()) + .map(|s| s.round().clamp(0.0, 100.0) as u8) + .unwrap_or(0); + let pass = v.get("pass").and_then(|p| p.as_bool()).unwrap_or(false); + let summary = v + .get("summary") + .and_then(|s| s.as_str()) + .unwrap_or_default() + .to_string(); + Ok(JudgeResult { score, pass, summary }) +} + +/// Read the saved agent's `provider` field to use as the default judge provider. +async fn agent_provider(db: &DB, w_id: &str, path: &str) -> error::Result> { + let value = sqlx::query_scalar!( + "SELECT value FROM resource WHERE workspace_id = $1 AND path = $2 AND resource_type = 'ai_agent'", + w_id, + path + ) + .fetch_optional(db) + .await? + .flatten() + .ok_or_else(|| Error::NotFound(format!("ai_agent resource {path} not found")))?; + + value + .get("provider") + .map(to_raw_value) + .ok_or_else(|| { + Error::BadRequest(format!( + "ai_agent resource {path} has no provider; pass judge_provider explicitly" + )) + }) +} + +// --------------------------------------------------------------------------- +// Job push + wait +// --------------------------------------------------------------------------- + +/// Push a single-step flow preview and block until it completes. Returns (result, success, ms). +async fn run_preview( + db: &DB, + user_db: &UserDB, + authed: &ApiAuthed, + w_id: &str, + flow: FlowValue, +) -> error::Result<(Box, bool, u64)> { + let started = Instant::now(); + let args: HashMap> = HashMap::new(); + let tx = PushIsolationLevel::Isolated(user_db.clone(), authed.clone().into()); + + let (uuid, tx) = push( + db, + tx, + w_id, + JobPayload::RawFlow { value: flow, path: Some("ai_agent/eval".to_string()), restarted_from: None }, + PushArgs::from(&args), + authed.display_username(), + &authed.email, + username_to_permissioned_as(&authed.username), + authed.token_prefix.as_deref(), + None, + None, + None, + None, + None, + None, + false, + false, + None, + true, + None, + None, + None, + None, + Some(&authed.clone().into()), + false, + None, + None, + None, + ) + .await?; + tx.commit().await?; + + let (result, success) = + run_wait_result_internal(db, uuid, w_id, None, false, &authed.username).await?; + Ok((result, success, started.elapsed().as_millis() as u64)) +} + +// --------------------------------------------------------------------------- +// Deterministic assertions +// --------------------------------------------------------------------------- + +/// Render an agent output as a plain string for text-based assertions. A JSON string output is +/// unwrapped; anything else uses its compact JSON text. +fn output_as_string(output: &RawValue) -> String { + match serde_json::from_str::(output.get()) { + Ok(serde_json::Value::String(s)) => s, + _ => output.get().to_string(), + } +} + +fn evaluate_assertion(assertion: &Assertion, output: &RawValue) -> AssertionResult { + let (passed, detail) = match assertion { + Assertion::Contains { value, case_sensitive } => { + let (haystack, needle) = casefold(output_as_string(output), value, *case_sensitive); + (haystack.contains(&needle), None) + } + Assertion::NotContains { value, case_sensitive } => { + let (haystack, needle) = casefold(output_as_string(output), value, *case_sensitive); + (!haystack.contains(&needle), None) + } + Assertion::Regex { pattern } => match regex::Regex::new(pattern) { + Ok(re) => (re.is_match(&output_as_string(output)), None), + Err(e) => (false, Some(format!("invalid regex: {e}"))), + }, + Assertion::JsonPathEquals { path, value } => { + match serde_json::from_str::(output.get()) { + Ok(json) => { + let found = json_path_get(&json, path); + (found == Some(value), found.map(|f| f.to_string())) + } + Err(e) => (false, Some(format!("output is not JSON: {e}"))), + } + } + Assertion::OutputSchemaValid => { + // v1: a basic structural check that the output is valid, non-null JSON. A full + // schema validation against the agent's output_schema is a fast-follow. + match serde_json::from_str::(output.get()) { + Ok(serde_json::Value::Null) | Err(_) => (false, None), + Ok(_) => (true, None), + } + } + }; + AssertionResult { assertion: assertion.clone(), passed, detail } +} + +fn casefold(haystack: String, needle: &str, case_sensitive: bool) -> (String, String) { + if case_sensitive { + (haystack, needle.to_string()) + } else { + (haystack.to_lowercase(), needle.to_lowercase()) + } +} + +/// Navigate a dotted JSON path (e.g. `data.items.0.name`) into a value. +fn json_path_get<'a>(value: &'a serde_json::Value, path: &str) -> Option<&'a serde_json::Value> { + let mut current = value; + for segment in path.split('.') { + current = match current { + serde_json::Value::Object(map) => map.get(segment)?, + serde_json::Value::Array(arr) => arr.get(segment.parse::().ok()?)?, + _ => return None, + }; + } + Some(current) +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + fn raw(s: &str) -> Box { + RawValue::from_string(s.to_string()).unwrap() + } + + #[test] + fn output_as_string_unwraps_json_strings() { + assert_eq!(output_as_string(&raw("\"hello\"")), "hello"); + assert_eq!(output_as_string(&raw("{\"a\":1}")), "{\"a\":1}"); + assert_eq!(output_as_string(&raw("42")), "42"); + } + + #[test] + fn contains_assertion_respects_case_sensitivity() { + let out = raw("\"Hello World\""); + let sensitive = Assertion::Contains { value: "hello".into(), case_sensitive: true }; + assert!(!evaluate_assertion(&sensitive, &out).passed); + let insensitive = Assertion::Contains { value: "hello".into(), case_sensitive: false }; + assert!(evaluate_assertion(&insensitive, &out).passed); + } + + #[test] + fn not_contains_and_regex_and_schema_assertions() { + let out = raw("\"order 123 shipped\""); + assert!(evaluate_assertion( + &Assertion::NotContains { value: "refund".into(), case_sensitive: false }, + &out + ) + .passed); + assert!(evaluate_assertion(&Assertion::Regex { pattern: r"order \d+".into() }, &out).passed); + assert!(!evaluate_assertion(&Assertion::Regex { pattern: "(".into() }, &out).passed); + assert!(evaluate_assertion(&Assertion::OutputSchemaValid, &raw("{\"a\":1}")).passed); + assert!(!evaluate_assertion(&Assertion::OutputSchemaValid, &raw("null")).passed); + } + + #[test] + fn json_path_navigates_objects_and_arrays() { + let v = json!({"data": {"items": [{"name": "a"}, {"name": "b"}]}}); + assert_eq!(json_path_get(&v, "data.items.1.name"), Some(&json!("b"))); + assert_eq!(json_path_get(&v, "data.missing"), None); + assert_eq!(json_path_get(&v, "data.items.9"), None); + } + + #[test] + fn json_path_equals_assertion() { + let out = raw("{\"status\":\"ok\",\"count\":3}"); + assert!(evaluate_assertion( + &Assertion::JsonPathEquals { path: "status".into(), value: json!("ok") }, + &out + ) + .passed); + assert!(!evaluate_assertion( + &Assertion::JsonPathEquals { path: "count".into(), value: json!(5) }, + &out + ) + .passed); + } + + #[test] + fn parse_judge_result_is_lenient() { + // Float score is rounded and clamped; pass/summary parsed. + let j = parse_judge_result(&raw("{\"score\": 87.6, \"pass\": true, \"summary\": \"good\"}")) + .unwrap(); + assert_eq!(j.score, 88); + assert!(j.pass); + assert_eq!(j.summary, "good"); + // Missing fields default safely. + let j2 = parse_judge_result(&raw("{\"score\": 200}")).unwrap(); + assert_eq!(j2.score, 100); + assert!(!j2.pass); + } +} diff --git a/backend/windmill-api/src/lib.rs b/backend/windmill-api/src/lib.rs index cbbb406db4fc8..12a47821fd58f 100644 --- a/backend/windmill-api/src/lib.rs +++ b/backend/windmill-api/src/lib.rs @@ -66,6 +66,7 @@ use crate::scim_oss::has_scim_token; use windmill_common::error::AppError; mod ai; +mod ai_agents; mod ai_skills; mod apps; pub mod args; @@ -621,6 +622,7 @@ pub async fn run_server( Router::new() }) .nest("/ai", ai::workspaced_service()) + .nest("/ai_agents", ai_agents::workspaced_service()) .nest("/ai_skills", ai_skills::workspaced_service()) .nest("/npm_proxy", windmill_api_npm_proxy::workspaced_service()) .nest( diff --git a/backend/windmill-types/src/flows.rs b/backend/windmill-types/src/flows.rs index 006185dbd9184..bed0502c80d2f 100644 --- a/backend/windmill-types/src/flows.rs +++ b/backend/windmill-types/src/flows.rs @@ -1000,6 +1000,11 @@ pub enum FlowModuleValue { tag: Option, #[serde(default, skip_serializing_if = "is_false")] omit_output_from_conversation: bool, + /// When set, the agent brain config (provider/model/system prompt/etc.) and tools are + /// resolved at runtime from this `ai_agent` resource path (hybrid linking). The module's + /// `input_transforms` then only carry the flow-local inputs (user_message/user_attachments). + #[serde(default, skip_serializing_if = "Option::is_none")] + agent: Option, }, } @@ -1035,6 +1040,7 @@ struct UntaggedFlowModuleValue { assets: Option>, tools: Option>, omit_output_from_conversation: Option, + agent: Option, pass_flow_input_directly: Option, squash: Option, #[serde(flatten)] @@ -1133,13 +1139,14 @@ impl<'de> Deserialize<'de> for FlowModuleValue { "identity" => Ok(FlowModuleValue::Identity), "aiagent" => Ok(FlowModuleValue::AIAgent { input_transforms: untagged.input_transforms.unwrap_or_default(), - tools: untagged - .tools - .ok_or_else(|| serde::de::Error::missing_field("tools"))?, + // Tools default to empty: a linked agent (see `agent`) resolves its tools from + // the referenced resource, so the module itself may carry none. + tools: untagged.tools.unwrap_or_default(), tag: untagged.tag, omit_output_from_conversation: untagged .omit_output_from_conversation .unwrap_or(false), + agent: untagged.agent, }), other => Err(serde::de::Error::unknown_variant( other, diff --git a/backend/windmill-worker/src/ai_executor.rs b/backend/windmill-worker/src/ai_executor.rs index 2591cc649527a..93d9fe3685cbb 100644 --- a/backend/windmill-worker/src/ai_executor.rs +++ b/backend/windmill-worker/src/ai_executor.rs @@ -35,7 +35,7 @@ use windmill_common::{ error::{self, Error}, flow_conversations::MessageType, flow_status::AgentAction, - flows::{FlowModule, FlowModuleValue, ToolValue}, + flows::{AgentTool, FlowModule, FlowModuleValue, ToolValue}, get_latest_hash_for_path, jobs::JobKind, scripts::get_full_hub_script_by_path, @@ -156,14 +156,20 @@ pub async fn handle_ai_agent_job( has_stream: &mut bool, ) -> Result, Error> { // build_args_map returns None if no $res:/$var: transforms needed, in which case use original args - let args = match build_args_map(job, client, conn).await? { + let local_args = match build_args_map(job, client, conn).await? { Some(transformed) => transformed, None => job.args.as_ref().map(|a| a.0.clone()).unwrap_or_default(), }; - let args = serde_json::from_str::(&serde_json::to_string(&args)?)?; - // Handle dry_run mode - check credentials without making API calls - if args.credentials_check { + // Handle dry_run mode - check credentials without making API calls. + // The credentials check is always invoked inline (provider present, no agent link and no + // parent flow), so it resolves before any flow/agent-resource context is fetched. + let is_credentials_check = local_args + .get("credentials_check") + .map(|v| v.get().trim() == "true") + .unwrap_or(false); + if is_credentials_check { + let args = serde_json::from_str::(&serde_json::to_string(&local_args)?)?; return handle_credentials_check(&args.provider).await; } @@ -249,7 +255,7 @@ pub async fn handle_ai_agent_job( let summary = module.summary.clone(); - let FlowModuleValue::AIAgent { tools, omit_output_from_conversation, .. } = + let FlowModuleValue::AIAgent { tools: module_tools, omit_output_from_conversation, agent, .. } = module.get_value()? else { return Err(Error::internal_err( @@ -257,6 +263,60 @@ pub async fn handle_ai_agent_job( )); }; + // Hybrid linking: when the step references a saved `ai_agent` resource, the brain config + // (provider/model/system prompt/etc.) and the tool set come from that resource; only the + // flow-local inputs (user_message/user_attachments) come from the step. When not linked, the + // brain is the step's resolved input_transforms and the tools are the module's own. + // v1 boundary: a linked agent's tools come wholly from the resource; per-tool flow-context + // wiring is a fast-follow. + let (args, tools): (AIAgentArgs, Vec) = if let Some(agent_ref) = agent.as_deref() { + let agent_path = agent_ref + .trim_start_matches("$res:") + .trim_start_matches("res://"); + let resource_value = client + .get_resource_value_interpolated::( + agent_path, + Some(job.id.to_string()), + ) + .await + .map_err(|e| { + Error::internal_err(format!("failed to load ai_agent resource {agent_path}: {e}")) + })?; + let mut config = match resource_value { + serde_json::Value::Object(map) => map, + _ => { + return Err(Error::internal_err(format!( + "ai_agent resource {agent_path} must be a JSON object" + ))) + } + }; + // Overlay flow-local inputs onto the agent brain. + for key in ["user_message", "user_attachments"] { + if let Some(v) = local_args.get(key) { + config.insert( + key.to_string(), + serde_json::from_str(v.get()).unwrap_or(serde_json::Value::Null), + ); + } + } + let tools = match config.remove("tools") { + Some(t) => serde_json::from_value::>(t).map_err(|e| { + Error::internal_err(format!("invalid tools in ai_agent resource {agent_path}: {e}")) + })?, + None => Vec::new(), + }; + let args = serde_json::from_value::(serde_json::Value::Object(config)) + .map_err(|e| { + Error::internal_err(format!( + "invalid ai_agent resource config {agent_path}: {e}" + )) + })?; + (args, tools) + } else { + let args = serde_json::from_str::(&serde_json::to_string(&local_args)?)?; + (args, module_tools) + }; + // Separate Windmill tools from MCP tools, websearch, and extract MCP resource configs let mut windmill_modules: Vec = Vec::new(); #[allow(unused_mut)] diff --git a/backend/windmill-worker/src/worker_lockfiles.rs b/backend/windmill-worker/src/worker_lockfiles.rs index d98a2cd2e06f8..e63d2260dfa8a 100644 --- a/backend/windmill-worker/src/worker_lockfiles.rs +++ b/backend/windmill-worker/src/worker_lockfiles.rs @@ -1250,6 +1250,7 @@ async fn lock_modules( mut tools, tag, omit_output_from_conversation, + agent, } => { // Extract FlowModules from tools and track their original indices // MCP tools don't need locking, so we filter them out @@ -1302,6 +1303,7 @@ async fn lock_modules( tools, tag, omit_output_from_conversation, + agent, } .into(); } diff --git a/docs/reusable-ai-agents.md b/docs/reusable-ai-agents.md new file mode 100644 index 0000000000000..94b3a824d179b --- /dev/null +++ b/docs/reusable-ai-agents.md @@ -0,0 +1,36 @@ +# Reusable AI Agents + +An AI agent flow step can be saved as a **reusable agent** — a resource of the built-in +`ai_agent` resource type that bundles the agent's brain (provider/model, system prompt, +temperature, output schema, memory…), its tool set, and an **eval suite**. Other flows can +link to the same agent, and edits to the agent propagate to every linked step. + +## Hybrid linking + +`FlowModuleValue::AIAgent` has an optional `agent` field holding the resource path. When set: + +- The brain config and tools are resolved at runtime from the resource + (`windmill-worker/src/ai_executor.rs`, via `get_resource_value_interpolated` — nested + provider `$res:` credentials resolve automatically). +- The flow step keeps only the flow-local inputs (`user_message`, `user_attachments`) in its + `input_transforms`. +- v1 boundary: a linked agent's tools come wholly from the resource (per-tool flow-context + wiring is a fast-follow). + +In the flow editor, the AI agent step's **Step Input** tab shows a bar to *Save as agent*, +*Use saved agent* (picker), or *Unlink* (snapshots the resolved config back into the step). + +## Evals + +A saved agent carries `evals.cases`. Each case has an input message, an optional LLM-judge +checklist, and optional deterministic assertions (`contains` / `not_contains` / `regex` / +`json_path_equals` / `output_schema_valid`). The **Evals** tab authors and runs them. + +Cases run for real (they cost tokens): the backend pushes a single-step AIAgent flow-preview +job and grades the output. The judge itself runs as an inline AIAgent step with a structured +output schema (`{score, pass, summary}`), defaulting to the agent's own provider. + +- `POST /w/{workspace}/ai_agents/run` — run a saved agent once on an input. +- `POST /w/{workspace}/ai_agents/eval_case` — run one eval case (execute + assertions + judge). + +Sharing works through standard resource folder permissions (save agents under `f/...`). diff --git a/frontend/src/lib/components/flows/agentResourceUtils.ts b/frontend/src/lib/components/flows/agentResourceUtils.ts new file mode 100644 index 0000000000000..def2b5c5b12c2 --- /dev/null +++ b/frontend/src/lib/components/flows/agentResourceUtils.ts @@ -0,0 +1,115 @@ +import type { InputTransform } from '$lib/gen' + +// The brain fields stored flat in an `ai_agent` resource value. The flow-local inputs +// (user_message/user_attachments) are intentionally excluded — they are supplied per-flow. +export const AGENT_BRAIN_KEYS = [ + 'provider', + 'output_type', + 'system_prompt', + 'streaming', + 'memory', + 'output_schema', + 'max_completion_tokens', + 'temperature', + 'max_iterations' +] as const + +export const AGENT_FLOW_LOCAL_KEYS = ['user_message', 'user_attachments'] as const + +export type AgentTool = Record + +export type AgentAssertion = + | { kind: 'contains'; value: string; case_sensitive?: boolean } + | { kind: 'not_contains'; value: string; case_sensitive?: boolean } + | { kind: 'regex'; pattern: string } + | { kind: 'json_path_equals'; path: string; value: unknown } + | { kind: 'output_schema_valid' } + +export interface AgentEvalCase { + id: string + name?: string + input: { user_message?: string; user_attachments?: unknown[] } + judge_checklist?: string[] + assertions?: AgentAssertion[] +} + +export interface AgentEvalSuite { + cases: AgentEvalCase[] + judge?: unknown +} + +/** Brain keys whose step transform is non-static and would be dropped by a save-as-agent snapshot. */ +export function nonStaticBrainKeys( + inputTransforms: Record | undefined +): string[] { + return AGENT_BRAIN_KEYS.filter((key) => { + const t = inputTransforms?.[key] as any + return t && t.type !== 'static' + }) +} + +export interface AIAgentConfig { + provider?: unknown + output_type?: string + system_prompt?: string + streaming?: boolean + memory?: unknown + output_schema?: unknown + max_completion_tokens?: number + temperature?: number + max_iterations?: number + tools?: AgentTool[] + evals?: AgentEvalSuite +} + +/** Extract the static brain values from a step's input_transforms into a flat agent config. */ +export function inputTransformsToAgentConfig( + inputTransforms: Record | undefined, + tools: AgentTool[] | undefined, + evals?: AgentEvalSuite +): AIAgentConfig { + const config: AIAgentConfig = { tools: tools ?? [], evals: evals ?? { cases: [] } } + for (const key of AGENT_BRAIN_KEYS) { + const t = inputTransforms?.[key] as any + if (t && t.type === 'static' && t.value !== undefined) { + ;(config as any)[key] = t.value + } + } + return config +} + +/** + * Reduce the AI agent schema to only the flow-local inputs. Used when a step is linked to a saved + * agent: the brain fields come from the resource, so only user_message/user_attachments stay editable. + */ +export function flowLocalAgentSchema(schema: any): any { + if (!schema?.properties) { + return schema + } + const properties: Record = {} + for (const key of AGENT_FLOW_LOCAL_KEYS) { + if (schema.properties[key]) { + properties[key] = schema.properties[key] + } + } + return { + ...schema, + properties, + order: (schema.order ?? Object.keys(properties)).filter((k: string) => k in properties), + required: (schema.required ?? []).filter((k: string) => k in properties) + } +} + +/** Inverse: wrap brain config values as static input_transforms (used when unlinking a step). */ +export function agentConfigToInputTransforms( + config: AIAgentConfig +): Record { + const it: Record = {} + for (const key of AGENT_BRAIN_KEYS) { + const v = (config as any)[key] + if (v !== undefined) { + it[key] = { type: 'static', value: v } as InputTransform + } + } + return it +} diff --git a/frontend/src/lib/components/flows/content/AgentEvalsPanel.svelte b/frontend/src/lib/components/flows/content/AgentEvalsPanel.svelte new file mode 100644 index 0000000000000..c426994823470 --- /dev/null +++ b/frontend/src/lib/components/flows/content/AgentEvalsPanel.svelte @@ -0,0 +1,369 @@ + + +
+
+ Evals + {#if ranCount > 0} + {passedCount}/{ranCount} passed + {/if} +
+ + + +
+
+ + {#if loading} +
+ Loading… +
+ {:else if cases.length === 0} +

+ No eval cases yet. Add a case with an input message, then grade it with a judge checklist + and/or deterministic assertions. +

+ {:else} + {#each cases as c (c.id)} + {@const r = results[c.id]} +
+
+ + {#if r === 'running'} + + {:else if r} + {#if r.passed} + + Pass + + {:else} + + Fail + + {/if} + {#if r.judge} + judge {r.judge.score} + {/if} + {r.latency_ms}ms + {/if} + +
+ + + + + +
+ Assertions + {#each c.assertions ?? [] as a, i (i)} +
+ {a.kind} + {#if a.kind === 'contains' || a.kind === 'not_contains'} + + {:else if a.kind === 'regex'} + + {:else if a.kind === 'json_path_equals'} + + { + try { + a.value = JSON.parse(e.currentTarget.value) + } catch { + a.value = e.currentTarget.value + } + }} + placeholder="expected value" + /> + {:else} + output is valid non-null JSON + {/if} +
+ {/each} +
+
+ + +
+ {#snippet actions()} + + {/snippet} + + diff --git a/frontend/src/lib/components/flows/content/FlowModuleComponent.svelte b/frontend/src/lib/components/flows/content/FlowModuleComponent.svelte index 46a5c78d329d9..2fcf6fbce2b77 100644 --- a/frontend/src/lib/components/flows/content/FlowModuleComponent.svelte +++ b/frontend/src/lib/components/flows/content/FlowModuleComponent.svelte @@ -36,6 +36,9 @@ import FlowModuleSleep from './FlowModuleSleep.svelte' import FlowPathViewer from './FlowPathViewer.svelte' import InputTransformSchemaForm from '$lib/components/InputTransformSchemaForm.svelte' + import AgentResourceBar from './AgentResourceBar.svelte' + import AgentEvalsPanel from './AgentEvalsPanel.svelte' + import { flowLocalAgentSchema } from '../agentResourceUtils' import FlowModuleMockTransitionMessage from './FlowModuleMockTransitionMessage.svelte' import Tooltip from '$lib/components/Tooltip.svelte' import { SecondsInput } from '$lib/components/common' @@ -164,6 +167,9 @@ flowModule.value.type === 'aiagent' ) let visibleSelected = $derived(selected === 'chat' && !canShowChatTab ? 'inputs' : selected) + let agentLinked = $derived( + flowModule.value.type === 'aiagent' && Boolean((flowModule.value as any).agent) + ) let advancedSelected = $state('retries') let advancedRuntimeSelected = $state('concurrency') let s3Kind = $state('s3_client') @@ -1028,12 +1034,43 @@ label="Chat" /> {/if} + {#if flowModule.value.type === 'aiagent' && !isAgentTool} + + {/if} {#if !preprocessorModule && !isAgentTool} {/if} {#if visibleSelected === 'inputs' && (flowModule.value.type == 'rawscript' || flowModule.value.type == 'script' || flowModule.value.type == 'flow' || flowModule.value.type == 'aiagent')}
+ {#if flowModule.value.type === 'aiagent'} + (flowModule.value as any).agent, + (v) => { + if (flowModule.value.type === 'aiagent') { + ;(flowModule.value as any).agent = v + } + } + } + bind:inputTransforms={ + () => (flowModule.value as any).input_transforms, + (v) => { + if (flowModule.value.type === 'aiagent') { + ;(flowModule.value as any).input_transforms = v + } + } + } + bind:tools={ + () => (flowModule.value as any).tools ?? [], + (v) => { + if (flowModule.value.type === 'aiagent') { + ;(flowModule.value as any).tools = v + } + } + } + /> + {/if} { @@ -1127,6 +1166,17 @@ />
+ {:else if visibleSelected === 'evals' && flowModule.value.type === 'aiagent'} +
+ {#if agentLinked && (flowModule.value as any).agent} + + {:else} +
+ Save this step as a reusable agent (in the Step Input tab) to author and + run evals against it. +
+ {/if} +
{:else if visibleSelected === 'advanced'} Date: Sun, 28 Jun 2026 13:11:56 +0200 Subject: [PATCH 2/4] feat: make linked AI agents rigid (read-only) with unlink-to-fork Co-Authored-By: Claude Opus 4.8 (1M context) --- backend/windmill-worker/src/ai_executor.rs | 15 +++- .../flows/content/AgentResourceBar.svelte | 87 ++++++++++++++++--- .../graph/renderers/nodes/AIToolNode.svelte | 5 +- 3 files changed, 92 insertions(+), 15 deletions(-) diff --git a/backend/windmill-worker/src/ai_executor.rs b/backend/windmill-worker/src/ai_executor.rs index 93d9fe3685cbb..5e6900fd7e713 100644 --- a/backend/windmill-worker/src/ai_executor.rs +++ b/backend/windmill-worker/src/ai_executor.rs @@ -255,8 +255,9 @@ pub async fn handle_ai_agent_job( let summary = module.summary.clone(); - let FlowModuleValue::AIAgent { tools: module_tools, omit_output_from_conversation, agent, .. } = - module.get_value()? + let FlowModuleValue::AIAgent { + tools: module_tools, omit_output_from_conversation, agent, .. + } = module.get_value()? else { return Err(Error::internal_err( "AI agent module is not an AI agent".to_string(), @@ -280,7 +281,9 @@ pub async fn handle_ai_agent_job( ) .await .map_err(|e| { - Error::internal_err(format!("failed to load ai_agent resource {agent_path}: {e}")) + Error::internal_err(format!( + "failed to load ai_agent resource {agent_path}: {e}" + )) })?; let mut config = match resource_value { serde_json::Value::Object(map) => map, @@ -299,9 +302,13 @@ pub async fn handle_ai_agent_job( ); } } + // A linked agent is rigid: brain and tools come wholly from the resource. To diverge, the + // step must be unlinked (forked), at which point the config is copied into the step. let tools = match config.remove("tools") { Some(t) => serde_json::from_value::>(t).map_err(|e| { - Error::internal_err(format!("invalid tools in ai_agent resource {agent_path}: {e}")) + Error::internal_err(format!( + "invalid tools in ai_agent resource {agent_path}: {e}" + )) })?, None => Vec::new(), }; diff --git a/frontend/src/lib/components/flows/content/AgentResourceBar.svelte b/frontend/src/lib/components/flows/content/AgentResourceBar.svelte index 9ef75d2f3d490..fdf9a5e863926 100644 --- a/frontend/src/lib/components/flows/content/AgentResourceBar.svelte +++ b/frontend/src/lib/components/flows/content/AgentResourceBar.svelte @@ -5,7 +5,7 @@ import { ResourceService, type InputTransform } from '$lib/gen' import { workspaceStore } from '$lib/stores' import { sendUserToast } from '$lib/toast' - import { Bot, Save, Unlink, Pencil } from 'lucide-svelte' + import { Bot, Save, Unlink, Pencil, TriangleAlert } from 'lucide-svelte' import { AGENT_FLOW_LOCAL_KEYS, agentConfigToInputTransforms, @@ -14,6 +14,7 @@ type AIAgentConfig, type AgentTool } from '../agentResourceUtils' + import { resource } from 'runed' let { agent = $bindable(), @@ -34,13 +35,53 @@ let saving = $state(false) let pickerValue: string | undefined = $state(undefined) - // Link the step as soon as a saved agent is picked. + type LinkedInfo = { tools: AgentTool[]; providerPath?: string; providerOk: boolean } + + // A linked agent is rigid and read-only: its brain, tools and evals come from the resource. We + // load them here for display, and probe the provider resource so we can warn when it isn't + // accessible in this workspace (the user then needs to unlink/fork or gain access). + let linkedResource = resource( + () => ({ ws: $workspaceStore, path: agent }), + async ({ ws, path }): Promise => { + if (!ws || !path) { + return { tools: [], providerOk: true } + } + const res = await ResourceService.getResource({ workspace: ws, path }) + const cfg = (res.value ?? {}) as AIAgentConfig & { provider?: { resource?: string } } + const tools = (cfg.tools ?? []) as AgentTool[] + const providerRef = cfg.provider?.resource + const providerPath = + typeof providerRef === 'string' && providerRef + ? providerRef.replace(/^\$res:/, '').replace(/^res:\/\//, '') + : undefined + let providerOk = true + if (providerPath) { + try { + await ResourceService.getResource({ workspace: ws, path: providerPath }) + } catch { + providerOk = false + } + } + return { tools, providerPath, providerOk } + } + ) + let inheritedTools = $derived(linkedResource.current?.tools ?? []) + let providerPath = $derived(linkedResource.current?.providerPath) + let providerOk = $derived(linkedResource.current?.providerOk ?? true) + + // Link the step as soon as a saved agent is picked. Linking is rigid, so the step keeps no tools + // of its own — they come from the resource. $effect(() => { if (pickerValue && pickerValue !== agent) { agent = pickerValue + tools = [] } }) + function toolLabel(tool: AgentTool): string { + return tool.summary || tool.value?.tool_type || tool.id + } + function openSave() { newPath = '' pathError = '' @@ -73,6 +114,8 @@ } }) agent = newPath + // The tools now live in the resource; a linked step keeps none of its own. + tools = [] saveDrawer?.closeDrawer() sendUserToast(`Saved reusable agent ${newPath}`) } catch (e) { @@ -82,6 +125,8 @@ } } + // Unlink forks the agent: the resource's brain + tools are copied into the step, which becomes a + // standalone, editable agent again. This is the only way to diverge from a saved agent. async function unlink() { if (!$workspaceStore || !agent) { return @@ -101,7 +146,7 @@ tools = cfg.tools ?? [] agent = undefined pickerValue = undefined - sendUserToast('Unlinked agent — config copied into the step') + sendUserToast('Forked agent — its configuration was copied into this step') } catch (e) { sendUserToast(`Failed to unlink agent: ${e}`, true) } @@ -128,9 +173,35 @@

- The agent brain and tools come from this resource. Only the message/inputs below are set in - this flow. + The brain, tools and evals come from this resource and can't be edited here. Only the + message/inputs below are set in this flow. Unlink to fork it into an editable step.

+ {#if inheritedTools.length > 0} +
+ Tools: + {#each inheritedTools as tool (tool.id)} + + {toolLabel(tool)} + + {/each} +
+ {/if} + {#if !providerOk} +
+ + + This agent's model provider{#if providerPath} + ({providerPath}){/if} isn't accessible in this workspace. + Unlink to fork the agent, or gain access to the provider resource{#if providerPath} + at {providerPath}{/if}. + +
+ {/if} {:else}
@@ -160,11 +231,7 @@ />
{#snippet actions()} diff --git a/frontend/src/lib/components/graph/renderers/nodes/AIToolNode.svelte b/frontend/src/lib/components/graph/renderers/nodes/AIToolNode.svelte index 5fd064c033b82..e01f522744563 100644 --- a/frontend/src/lib/components/graph/renderers/nodes/AIToolNode.svelte +++ b/frontend/src/lib/components/graph/renderers/nodes/AIToolNode.svelte @@ -200,7 +200,10 @@ allToolEdges.push(...(toolEdges ?? [])) allToolNodes.push(...(toolNodes ?? [])) - if (insertable) { + // A linked agent is rigid: its tools come from the resource and can't be edited here, so + // don't offer the "add tool" node (unlink/fork the step to change tools). + const isLinkedAgent = !!(node.data.module.value as { agent?: string }).agent + if (insertable && !isLinkedAgent) { allToolNodes.push({ type: 'newAiTool', data: { eventHandlers, agentModuleId: node.data.module.id }, From 47d1af016e7a0def0302997d474abee06c54d382 Mon Sep 17 00:00:00 2001 From: Ruben Fiszel Date: Sun, 28 Jun 2026 13:32:35 +0200 Subject: [PATCH 3/4] feat: show inherited agent config read-only on linked step Co-Authored-By: Claude Opus 4.8 (1M context) --- .../flows/agentResourceUtils.test.ts | 38 +++++++++++++ .../components/flows/agentResourceUtils.ts | 40 +++++++++++++ .../flows/content/AgentResourceBar.svelte | 56 +++++++++++++------ 3 files changed, 118 insertions(+), 16 deletions(-) create mode 100644 frontend/src/lib/components/flows/agentResourceUtils.test.ts diff --git a/frontend/src/lib/components/flows/agentResourceUtils.test.ts b/frontend/src/lib/components/flows/agentResourceUtils.test.ts new file mode 100644 index 0000000000000..18963e27cb340 --- /dev/null +++ b/frontend/src/lib/components/flows/agentResourceUtils.test.ts @@ -0,0 +1,38 @@ +import { describe, expect, it } from 'vitest' + +import { summarizeAgentBrain } from './agentResourceUtils' + +describe('summarizeAgentBrain', () => { + it('returns only set fields, in brain-key order, formatted', () => { + const rows = summarizeAgentBrain({ + provider: { kind: 'openai', model: 'gpt-4o', resource: '$res:f/x/openai' } as any, + system_prompt: 'You are helpful', + temperature: 0.7, + streaming: true, + max_iterations: 10 + }) + expect(rows).toEqual([ + { label: 'Model', value: 'openai · gpt-4o' }, + { label: 'System prompt', value: 'You are helpful' }, + { label: 'Streaming', value: 'on' }, + { label: 'Temperature', value: '0.7' }, + { label: 'Max iterations', value: '10' } + ]) + }) + + it('skips empty/undefined fields', () => { + expect(summarizeAgentBrain({ system_prompt: '', provider: undefined as any })).toEqual([]) + expect(summarizeAgentBrain(undefined)).toEqual([]) + }) + + it('summarizes structured fields compactly', () => { + const rows = summarizeAgentBrain({ + memory: { type: 'auto', context_length: 20 } as any, + output_schema: { type: 'object' } as any + }) + expect(rows).toEqual([ + { label: 'Memory', value: 'auto' }, + { label: 'Output schema', value: 'configured' } + ]) + }) +}) diff --git a/frontend/src/lib/components/flows/agentResourceUtils.ts b/frontend/src/lib/components/flows/agentResourceUtils.ts index def2b5c5b12c2..d6cfc7ec8718c 100644 --- a/frontend/src/lib/components/flows/agentResourceUtils.ts +++ b/frontend/src/lib/components/flows/agentResourceUtils.ts @@ -100,6 +100,46 @@ export function flowLocalAgentSchema(schema: any): any { } } +const AGENT_BRAIN_LABELS: Record = { + provider: 'Model', + output_type: 'Output type', + system_prompt: 'System prompt', + streaming: 'Streaming', + memory: 'Memory', + output_schema: 'Output schema', + max_completion_tokens: 'Max tokens', + temperature: 'Temperature', + max_iterations: 'Max iterations' +} + +/** Flatten a saved agent's brain config into human-readable label/value rows for a read-only + * display on a linked step. Only set fields are returned, in the canonical brain-key order. */ +export function summarizeAgentBrain( + config: AIAgentConfig | undefined +): { label: string; value: string }[] { + const rows: { label: string; value: string }[] = [] + for (const key of AGENT_BRAIN_KEYS) { + const v = (config as any)?.[key] + if (v === undefined || v === null || v === '') continue + let value: string + if (key === 'provider') { + value = [v.kind, v.model].filter(Boolean).join(' · ') || 'configured' + } else if (key === 'memory') { + value = typeof v === 'object' ? (v.type ?? 'configured') : String(v) + } else if (key === 'output_schema') { + value = 'configured' + } else if (typeof v === 'boolean') { + value = v ? 'on' : 'off' + } else if (typeof v === 'object') { + value = JSON.stringify(v) + } else { + value = String(v) + } + rows.push({ label: AGENT_BRAIN_LABELS[key] ?? key, value }) + } + return rows +} + /** Inverse: wrap brain config values as static input_transforms (used when unlinking a step). */ export function agentConfigToInputTransforms( config: AIAgentConfig diff --git a/frontend/src/lib/components/flows/content/AgentResourceBar.svelte b/frontend/src/lib/components/flows/content/AgentResourceBar.svelte index fdf9a5e863926..0719eb314f466 100644 --- a/frontend/src/lib/components/flows/content/AgentResourceBar.svelte +++ b/frontend/src/lib/components/flows/content/AgentResourceBar.svelte @@ -11,6 +11,7 @@ agentConfigToInputTransforms, inputTransformsToAgentConfig, nonStaticBrainKeys, + summarizeAgentBrain, type AIAgentConfig, type AgentTool } from '../agentResourceUtils' @@ -35,7 +36,12 @@ let saving = $state(false) let pickerValue: string | undefined = $state(undefined) - type LinkedInfo = { tools: AgentTool[]; providerPath?: string; providerOk: boolean } + type LinkedInfo = { + config: AIAgentConfig + tools: AgentTool[] + providerPath?: string + providerOk: boolean + } // A linked agent is rigid and read-only: its brain, tools and evals come from the resource. We // load them here for display, and probe the provider resource so we can warn when it isn't @@ -44,7 +50,7 @@ () => ({ ws: $workspaceStore, path: agent }), async ({ ws, path }): Promise => { if (!ws || !path) { - return { tools: [], providerOk: true } + return { config: {}, tools: [], providerOk: true } } const res = await ResourceService.getResource({ workspace: ws, path }) const cfg = (res.value ?? {}) as AIAgentConfig & { provider?: { resource?: string } } @@ -62,10 +68,11 @@ providerOk = false } } - return { tools, providerPath, providerOk } + return { config: cfg, tools, providerPath, providerOk } } ) let inheritedTools = $derived(linkedResource.current?.tools ?? []) + let brainParams = $derived(summarizeAgentBrain(linkedResource.current?.config)) let providerPath = $derived(linkedResource.current?.providerPath) let providerOk = $derived(linkedResource.current?.providerOk ?? true) @@ -173,20 +180,37 @@

- The brain, tools and evals come from this resource and can't be edited here. Only the - message/inputs below are set in this flow. Unlink to fork it into an editable step. + The configuration below comes from this saved agent and is read-only. Only the message/inputs + are set in this flow. Unlink to fork it into an editable step.

- {#if inheritedTools.length > 0} -
- Tools: - {#each inheritedTools as tool (tool.id)} - - {toolLabel(tool)} - - {/each} + {#if brainParams.length > 0 || inheritedTools.length > 0} +
+
+ From saved agent · read-only +
+
+ {#each brainParams as param (param.label)} +
+
{param.label}
+
{param.value}
+
+ {/each} + {#if inheritedTools.length > 0} +
+
Tools
+
+ {#each inheritedTools as tool (tool.id)} + + {toolLabel(tool)} + + {/each} +
+
+ {/if} +
{/if} {#if !providerOk} From 227dbf9a3e7c42f9eac5a9e322dc9c88054e02a3 Mon Sep 17 00:00:00 2001 From: Ruben Fiszel Date: Sun, 28 Jun 2026 13:49:04 +0200 Subject: [PATCH 4/4] feat: edit/update a saved agent in place via upsert Co-Authored-By: Claude Opus 4.8 (1M context) --- .../flows/content/AgentResourceBar.svelte | 184 +++++++++++++----- 1 file changed, 137 insertions(+), 47 deletions(-) diff --git a/frontend/src/lib/components/flows/content/AgentResourceBar.svelte b/frontend/src/lib/components/flows/content/AgentResourceBar.svelte index 0719eb314f466..72a487880bb9c 100644 --- a/frontend/src/lib/components/flows/content/AgentResourceBar.svelte +++ b/frontend/src/lib/components/flows/content/AgentResourceBar.svelte @@ -20,13 +20,11 @@ let { agent = $bindable(), inputTransforms = $bindable(), - tools = $bindable(), - onEdit + tools = $bindable() }: { agent: string | undefined inputTransforms: Record tools: AgentTool[] - onEdit?: () => void } = $props() let saveDrawer: Drawer | undefined = $state() @@ -35,6 +33,9 @@ let description = $state('') let saving = $state(false) let pickerValue: string | undefined = $state(undefined) + // Set when the step was forked from a saved agent for editing: it's now an editable standalone + // step, but "Save changes" upserts back to this path (and re-links), propagating to all flows. + let editingPath: string | undefined = $state(undefined) type LinkedInfo = { config: AIAgentConfig @@ -90,41 +91,63 @@ } function openSave() { - newPath = '' + newPath = editingPath ?? '' pathError = '' description = '' saveDrawer?.openDrawer() } - async function saveAsAgent() { - if (!$workspaceStore || pathError || !newPath) { - return + // Create or update the `ai_agent` resource at `path` from the step's current brain + tools, then + // link the step to it. Updating preserves the resource's existing eval suite (evals are edited + // in place on the linked step's Evals tab, not here). + async function persist(path: string, description?: string) { + const dropped = nonStaticBrainKeys(inputTransforms) + if (dropped.length > 0) { + sendUserToast( + `Note: ${dropped.join(', ')} use a computed/connected value and won't be saved into the agent`, + true + ) } - saving = true - try { - // Only static brain values are portable into a reusable agent; flag any that won't carry over. - const dropped = nonStaticBrainKeys(inputTransforms) - if (dropped.length > 0) { - sendUserToast( - `Note: ${dropped.join(', ')} use a computed/connected value and won't be saved into the agent`, - true - ) + const value = inputTransformsToAgentConfig(inputTransforms, tools) + const exists = await ResourceService.existsResource({ workspace: $workspaceStore!, path }) + if (exists) { + const cur = await ResourceService.getResource({ workspace: $workspaceStore!, path }) + const curEvals = (cur.value as AIAgentConfig | undefined)?.evals + if (curEvals) { + value.evals = curEvals } - const value = inputTransformsToAgentConfig(inputTransforms, tools) + await ResourceService.updateResourceValue({ + workspace: $workspaceStore!, + path, + requestBody: { value } + }) + } else { await ResourceService.createResource({ - workspace: $workspaceStore, + workspace: $workspaceStore!, requestBody: { - path: newPath, + path, value, resource_type: 'ai_agent', description: description || 'Reusable AI agent' } }) - agent = newPath - // The tools now live in the resource; a linked step keeps none of its own. - tools = [] + } + agent = path + // The brain + tools now live in the resource; a linked step keeps none of its own. + tools = [] + editingPath = undefined + } + + async function saveAsAgent() { + if (!$workspaceStore || pathError || !newPath) { + return + } + saving = true + try { + const updating = newPath === editingPath + await persist(newPath, description) saveDrawer?.closeDrawer() - sendUserToast(`Saved reusable agent ${newPath}`) + sendUserToast(updating ? `Updated agent ${newPath}` : `Saved reusable agent ${newPath}`) } catch (e) { sendUserToast(`Failed to save agent: ${e}`, true) } finally { @@ -132,32 +155,73 @@ } } - // Unlink forks the agent: the resource's brain + tools are copied into the step, which becomes a - // standalone, editable agent again. This is the only way to diverge from a saved agent. - async function unlink() { - if (!$workspaceStore || !agent) { + // Save the forked-for-edit step back to the agent it came from, updating it in place. + async function saveChanges() { + if (!$workspaceStore || !editingPath) { return } + saving = true + const path = editingPath try { - const res = await ResourceService.getResource({ workspace: $workspaceStore, path: agent }) - const cfg = (res.value ?? {}) as AIAgentConfig - const brain = agentConfigToInputTransforms(cfg) - // Preserve the flow-local inputs already wired in the step. - const local: Record = {} - for (const key of AGENT_FLOW_LOCAL_KEYS) { - if (inputTransforms?.[key]) { - local[key] = inputTransforms[key] - } + await persist(path) + sendUserToast(`Updated agent ${path}`) + } catch (e) { + sendUserToast(`Failed to update agent: ${e}`, true) + } finally { + saving = false + } + } + + // Copy the linked resource's brain + tools into the step, turning it back into a standalone + // editable agent. Shared by Unlink (diverge here) and Edit (edit the saved agent itself). + async function forkFromResource(): Promise { + if (!$workspaceStore || !agent) { + return undefined + } + const path = agent + const res = await ResourceService.getResource({ workspace: $workspaceStore, path }) + const cfg = (res.value ?? {}) as AIAgentConfig + const brain = agentConfigToInputTransforms(cfg) + // Preserve the flow-local inputs already wired in the step. + const local: Record = {} + for (const key of AGENT_FLOW_LOCAL_KEYS) { + if (inputTransforms?.[key]) { + local[key] = inputTransforms[key] + } + } + inputTransforms = { ...brain, ...local } + tools = cfg.tools ?? [] + agent = undefined + pickerValue = undefined + return path + } + + // Unlink forks the agent into this step so it can diverge here. It does not write back. + async function unlink() { + try { + const path = await forkFromResource() + if (path) { + editingPath = undefined + sendUserToast('Forked agent — its configuration was copied into this step') } - inputTransforms = { ...brain, ...local } - tools = cfg.tools ?? [] - agent = undefined - pickerValue = undefined - sendUserToast('Forked agent — its configuration was copied into this step') } catch (e) { sendUserToast(`Failed to unlink agent: ${e}`, true) } } + + // Edit the saved agent itself: fork it into the step for editing, remembering the path so + // "Save changes" writes back to it (updating every flow that links to it). + async function editAgent() { + try { + const path = await forkFromResource() + if (path) { + editingPath = path + sendUserToast(`Editing ${path} — make changes, then Save changes to update it`) + } + } catch (e) { + sendUserToast(`Failed to edit agent: ${e}`, true) + } + }
@@ -169,11 +233,9 @@ Linked to {agent}
- {#if onEdit} - - {/if} + @@ -181,7 +243,8 @@

The configuration below comes from this saved agent and is read-only. Only the message/inputs - are set in this flow. Unlink to fork it into an editable step. + are set in this flow. Edit to change the agent everywhere it's + used, or Unlink to fork an editable copy into just this step.

{#if brainParams.length > 0 || inheritedTools.length > 0}
@@ -226,6 +289,33 @@
{/if} + {:else if editingPath} +
+ + Editing + {editingPath} +
+ + +
+
+

+ Editing the saved agent. Save changes updates it and re-links this step — the update + propagates to every flow that links to it. Cancel keeps your edits here as a standalone step + instead. +

{:else}