diff --git a/backend/.sqlx/query-8c08eb619da44fd89d5f6f787b96f6a9252200e32280b2c6fa49a58ae2cb676f.json b/backend/.sqlx/query-8c08eb619da44fd89d5f6f787b96f6a9252200e32280b2c6fa49a58ae2cb676f.json
new file mode 100644
index 0000000000000..63c335a09c6d5
--- /dev/null
+++ b/backend/.sqlx/query-8c08eb619da44fd89d5f6f787b96f6a9252200e32280b2c6fa49a58ae2cb676f.json
@@ -0,0 +1,23 @@
+{
+  "db_name": "PostgreSQL",
+  "query": "SELECT value FROM resource WHERE workspace_id = $1 AND path = $2 AND resource_type = 'ai_agent'",
+  "describe": {
+    "columns": [
+      {
+        "ordinal": 0,
+        "name": "value",
+        "type_info": "Jsonb"
+      }
+    ],
+    "parameters": {
+      "Left": [
+        "Text",
+        "Text"
+      ]
+    },
+    "nullable": [
+      true
+    ]
+  },
+  "hash": "8c08eb619da44fd89d5f6f787b96f6a9252200e32280b2c6fa49a58ae2cb676f"
+}
diff --git a/backend/migrations/20260627124035_ai_agent_resource_type.down.sql b/backend/migrations/20260627124035_ai_agent_resource_type.down.sql
new file mode 100644
index 0000000000000..cfe9c91cae7f4
--- /dev/null
+++ b/backend/migrations/20260627124035_ai_agent_resource_type.down.sql
@@ -0,0 +1 @@
+DELETE FROM resource_type WHERE workspace_id = 'admins' AND name = 'ai_agent';
diff --git a/backend/migrations/20260627124035_ai_agent_resource_type.up.sql b/backend/migrations/20260627124035_ai_agent_resource_type.up.sql
new file mode 100644
index 0000000000000..4d18883df4509
--- /dev/null
+++ b/backend/migrations/20260627124035_ai_agent_resource_type.up.sql
@@ -0,0 +1,41 @@
+-- Built-in `ai_agent` resource type backing reusable AI agent steps.
+-- A resource of this type stores an agent's brain (provider/model/system prompt/etc.),
+-- its tool set, and its eval suite. Flow steps link to it via FlowModuleValue::AIAgent.agent.
+--
+-- Seeded into the `admins` workspace: list_resource_types unions `workspace_id = 'admins'`,
+-- so this single row is visible from every workspace (existing and future), mirroring how
+-- hub-synced built-in types (e.g. s3object) are made globally available.
+INSERT INTO resource_type (workspace_id, name, schema, description, edited_at) VALUES
+    ('admins', 'ai_agent', '{
+        "type": "object",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "required": ["provider"],
+        "properties": {
+            "provider": {
+                "type": "object",
+                "format": "ai-provider",
+                "description": "AI provider + model + credentials resource for the agent."
+            },
+            "system_prompt": { "type": "string", "description": "System prompt for the agent." },
+            "temperature": { "type": "number", "description": "Sampling temperature (0.0-2.0)." },
+            "max_completion_tokens": { "type": "number", "description": "Maximum output tokens." },
+            "max_iterations": { "type": "number", "description": "Max reasoning/tool-use loops." },
+            "output_type": { "type": "string", "enum": ["text", "image"], "default": "text" },
+            "output_schema": { "type": "object", "format": "json-schema", "description": "Structured-output JSON schema." },
+            "streaming": { "type": "boolean" },
+            "memory": { "type": "object", "description": "Conversation memory config (off/auto/manual)." },
+            "tools": { "type": "array", "description": "Reusable tool definitions available to the agent." },
+            "evals": {
+                "type": "object",
+                "description": "Eval suite: cases graded by deterministic assertions and/or an LLM judge.",
+                "properties": {
+                    "cases": { "type": "array" },
+                    "judge": { "type": "object" }
+                }
+            }
+        }
+    }'::jsonb,
+    'A reusable AI agent: provider/model, system prompt, tools and an eval suite. Referenced by AI agent flow steps.',
+    now())
+ON CONFLICT (workspace_id, name) DO UPDATE
+    SET schema = EXCLUDED.schema, description = EXCLUDED.description, edited_at = now();
diff --git a/backend/windmill-ai/src/types.rs b/backend/windmill-ai/src/types.rs
index 8f864468dbcbb..2bd2e6017f8ed 100644
--- a/backend/windmill-ai/src/types.rs
+++ b/backend/windmill-ai/src/types.rs
@@ -160,6 +160,95 @@ impl From<AIAgentArgsRaw> for AIAgentArgs {
     }
 }
 
+// ===========================================================================
+// Reusable AI agent — eval suite types
+//
+// An `ai_agent` resource stores an `AIAgentConfig` (brain + tools + evals). The
+// brain fields mirror `AIAgentArgsRaw` and are merged into `AIAgentArgs` at
+// runtime via a plain JSON merge, so they are not re-declared here. The types
+// below model the eval suite, which the judge/run endpoints inspect directly.
+// ===========================================================================
+
+/// One eval case stored under an `ai_agent` resource's `evals.cases`.
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct AgentEvalCase {
+    pub id: String,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub name: Option<String>,
+    pub input: EvalInput,
+    /// LLM-judge acceptance criteria (each a single bullet the output must satisfy).
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub judge_checklist: Vec<String>,
+    /// Deterministic checks evaluated without an LLM.
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub assertions: Vec<Assertion>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, Default)]
+pub struct EvalInput {
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub user_message: Option<String>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub user_attachments: Option<Vec<S3Object>>,
+}
+
+/// Deterministic, LLM-free check run against an agent's output.
+#[derive(Serialize, Deserialize, Debug, Clone)]
+#[serde(tag = "kind", rename_all = "snake_case")]
+pub enum Assertion {
+    Contains {
+        value: String,
+        #[serde(default)]
+        case_sensitive: bool,
+    },
+    NotContains {
+        value: String,
+        #[serde(default)]
+        case_sensitive: bool,
+    },
+    Regex {
+        pattern: String,
+    },
+    /// JSONPath-style dotted path into a structured output equals the given value.
+    JsonPathEquals {
+        path: String,
+        value: serde_json::Value,
+    },
+    /// Output validates against the agent's configured `output_schema`.
+    OutputSchemaValid,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct AssertionResult {
+    pub assertion: Assertion,
+    pub passed: bool,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub detail: Option<String>,
+}
+
+/// LLM-judge verdict for one case (0-100 score + pass/fail + rationale).
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct JudgeResult {
+    pub score: u8,
+    pub pass: bool,
+    pub summary: String,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct EvalCaseResult {
+    pub case_id: String,
+    pub passed: bool,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub output: Option<Box<RawValue>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub error: Option<String>,
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub assertions: Vec<AssertionResult>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub judge: Option<JudgeResult>,
+    pub latency_ms: u64,
+}
+
 #[derive(Deserialize, Debug)]
 pub struct ProviderResource {
     #[serde(alias = "apiKey", default, deserialize_with = "empty_string_as_none")]
diff --git a/backend/windmill-api/openapi.yaml b/backend/windmill-api/openapi.yaml
index e9c57c3fb0523..5108c37c801c2 100644
--- a/backend/windmill-api/openapi.yaml
+++ b/backend/windmill-api/openapi.yaml
@@ -6768,6 +6768,79 @@ paths:
               schema:
                 type: string
 
+  /w/{workspace}/ai_agents/run:
+    post:
+      summary: run a saved AI agent once on an input
+      operationId: runAiAgent
+      tags:
+        - ai_agent
+      parameters:
+        - $ref: "#/components/parameters/WorkspaceId"
+      requestBody:
+        description: agent resource path and input
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - agent
+              properties:
+                agent:
+                  type: string
+                  description: Path of a saved ai_agent resource
+                input:
+                  type: object
+                  properties:
+                    user_message:
+                      type: string
+                    user_attachments:
+                      type: array
+                      items:
+                        type: object
+      responses:
+        "200":
+          description: agent output
+          content:
+            application/json:
+              schema: {}
+
+  /w/{workspace}/ai_agents/eval_case:
+    post:
+      summary: run a single eval case against a saved AI agent
+      operationId: evalAiAgentCase
+      tags:
+        - ai_agent
+      parameters:
+        - $ref: "#/components/parameters/WorkspaceId"
+      requestBody:
+        description: agent resource path, eval case, and optional judge provider
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - agent
+                - case
+              properties:
+                agent:
+                  type: string
+                  description: Path of a saved ai_agent resource
+                case:
+                  type: object
+                  description: AgentEvalCase (id, input, judge_checklist, assertions)
+                judge_provider:
+                  type: object
+                  description: Optional ai-provider override for the judge; defaults to the agent's provider
+      responses:
+        "200":
+          description: eval case result (pass/fail, judge verdict, assertion results, output)
+          content:
+            application/json:
+              schema:
+                type: object
+
   /w/{workspace}/resources/delete/{path}:
     delete:
       summary: delete resource
diff --git a/backend/windmill-api/src/ai_agents.rs b/backend/windmill-api/src/ai_agents.rs
new file mode 100644
index 0000000000000..3871f514f08ca
--- /dev/null
+++ b/backend/windmill-api/src/ai_agents.rs
@@ -0,0 +1,502 @@
+//! Endpoints for reusable AI agents (the `ai_agent` resource type).
+//!
+//! Agents are run by pushing a single-module flow-preview job whose one step is an
+//! `AIAgent` module. For a saved agent that step is *linked* (`agent: Some(path)`), so the
+//! brain config + tools resolve at runtime from the resource via the same hybrid-linking path
+//! the flow executor uses. The eval judge is itself run as an inline `AIAgent` step with a
+//! structured `output_schema`, so the whole feature reuses the existing job machinery.
+
+use std::collections::HashMap;
+use std::time::Instant;
+
+use axum::{extract::Path, routing::post, Extension, Json, Router};
+use serde::Deserialize;
+use serde_json::value::RawValue;
+
+use windmill_ai::types::{
+    AgentEvalCase, Assertion, AssertionResult, EvalCaseResult, EvalInput, JudgeResult,
+};
+use windmill_common::error::{self, Error};
+use windmill_common::flows::{FlowModule, FlowModuleValue, FlowValue, InputTransform};
+use windmill_common::jobs::JobPayload;
+use windmill_common::worker::to_raw_value;
+use windmill_queue::{push, PushArgs, PushIsolationLevel};
+
+use crate::db::{ApiAuthed, DB};
+use crate::jobs::run_wait_result_internal;
+use crate::utils::check_scopes;
+use windmill_common::db::UserDB;
+use windmill_common::users::username_to_permissioned_as;
+
+pub fn workspaced_service() -> Router {
+    Router::new()
+        .route("/run", post(run_agent))
+        .route("/eval_case", post(eval_case))
+}
+
+#[derive(Deserialize)]
+struct RunAgentRequest {
+    /// Path of a saved `ai_agent` resource.
+    agent: String,
+    #[serde(default)]
+    input: EvalInput,
+}
+
+#[derive(Deserialize)]
+struct EvalCaseRequest {
+    /// Path of a saved `ai_agent` resource.
+    agent: String,
+    case: AgentEvalCase,
+    /// Optional judge provider (ai-provider shape, may contain nested `$res:`). Defaults to the
+    /// agent's own provider when omitted.
+    #[serde(default)]
+    judge_provider: Option<Box<RawValue>>,
+}
+
+/// Run a saved agent once on a given input and return its raw output.
+async fn run_agent(
+    authed: ApiAuthed,
+    Extension(db): Extension<DB>,
+    Extension(user_db): Extension<UserDB>,
+    Path(w_id): Path<String>,
+    Json(req): Json<RunAgentRequest>,
+) -> error::Result<Json<Box<RawValue>>> {
+    check_scopes(&authed, || format!("jobs:run"))?;
+    let flow = build_linked_agent_flow(&req.agent, &req.input);
+    let (output, success, _ms) =
+        run_preview(&db, &user_db, &authed, &w_id, flow).await?;
+    if !success {
+        return Err(Error::ExecutionErr(format!(
+            "agent run failed: {}",
+            output.get()
+        )));
+    }
+    Ok(Json(output))
+}
+
+/// Run a single eval case against a saved agent: execute, apply deterministic assertions, and
+/// (if the case has a checklist) grade with an LLM judge.
+async fn eval_case(
+    authed: ApiAuthed,
+    Extension(db): Extension<DB>,
+    Extension(user_db): Extension<UserDB>,
+    Path(w_id): Path<String>,
+    Json(req): Json<EvalCaseRequest>,
+) -> error::Result<Json<EvalCaseResult>> {
+    check_scopes(&authed, || format!("jobs:run"))?;
+    let case = req.case;
+
+    // 1. Run the agent under test.
+    let started = Instant::now();
+    let flow = build_linked_agent_flow(&req.agent, &case.input);
+    let run = run_preview(&db, &user_db, &authed, &w_id, flow).await;
+    let latency_ms = started.elapsed().as_millis() as u64;
+
+    let (output, success) = match run {
+        Ok((output, success, _)) => (output, success),
+        Err(e) => {
+            return Ok(Json(EvalCaseResult {
+                case_id: case.id,
+                passed: false,
+                output: None,
+                error: Some(e.to_string()),
+                assertions: vec![],
+                judge: None,
+                latency_ms,
+            }));
+        }
+    };
+
+    if !success {
+        return Ok(Json(EvalCaseResult {
+            case_id: case.id,
+            passed: false,
+            output: Some(output.clone()),
+            error: Some(format!("agent run failed: {}", output.get())),
+            assertions: vec![],
+            judge: None,
+            latency_ms,
+        }));
+    }
+
+    // 2. Deterministic assertions.
+    let assertion_results: Vec<AssertionResult> = case
+        .assertions
+        .iter()
+        .map(|a| evaluate_assertion(a, &output))
+        .collect();
+    let assertions_pass = assertion_results.iter().all(|r| r.passed);
+
+    // 3. LLM judge (only when a checklist is provided).
+    let judge = if case.judge_checklist.is_empty() {
+        None
+    } else {
+        let provider = match req.judge_provider {
+            Some(p) => p,
+            None => agent_provider(&db, &w_id, &req.agent).await?,
+        };
+        match run_judge(&db, &user_db, &authed, &w_id, provider, &output, &case.judge_checklist)
+            .await
+        {
+            Ok(j) => Some(j),
+            Err(e) => Some(JudgeResult {
+                score: 0,
+                pass: false,
+                summary: format!("judge failed: {e}"),
+            }),
+        }
+    };
+
+    let passed = assertions_pass && judge.as_ref().map(|j| j.pass).unwrap_or(true);
+
+    Ok(Json(EvalCaseResult {
+        case_id: case.id,
+        passed,
+        output: Some(output),
+        error: None,
+        assertions: assertion_results,
+        judge,
+        latency_ms,
+    }))
+}
+
+// ---------------------------------------------------------------------------
+// Flow construction
+// ---------------------------------------------------------------------------
+
+fn static_transform<T: serde::Serialize>(value: &T) -> InputTransform {
+    InputTransform::new_static_value(to_raw_value(value))
+}
+
+fn single_module_flow(value: FlowModuleValue) -> FlowValue {
+    FlowValue {
+        modules: vec![FlowModule {
+            id: "a".to_string(),
+            value: to_raw_value(&value),
+            ..Default::default()
+        }],
+        ..Default::default()
+    }
+}
+
+/// A one-step flow whose AIAgent module links to the saved agent resource. Only the flow-local
+/// inputs are set locally; the brain + tools resolve from the resource at runtime.
+fn build_linked_agent_flow(agent_path: &str, input: &EvalInput) -> FlowValue {
+    let mut input_transforms = HashMap::new();
+    if let Some(msg) = &input.user_message {
+        input_transforms.insert("user_message".to_string(), static_transform(msg));
+    }
+    if let Some(att) = &input.user_attachments {
+        input_transforms.insert("user_attachments".to_string(), static_transform(att));
+    }
+    single_module_flow(FlowModuleValue::AIAgent {
+        input_transforms,
+        tools: vec![],
+        tag: None,
+        omit_output_from_conversation: false,
+        agent: Some(agent_path.to_string()),
+    })
+}
+
+const JUDGE_SYSTEM_PROMPT: &str = "You are a strict evaluator. You are given the OUTPUT produced \
+by an AI agent and a CHECKLIST of acceptance criteria. Judge whether the output satisfies every \
+criterion. Respond ONLY via the structured output: `score` is an integer 0-100 reflecting overall \
+quality against the checklist, `pass` is true only if all criteria are satisfied, and `summary` \
+is one or two sentences explaining the verdict.";
+
+/// Run the judge as an inline AIAgent step with a structured output schema.
+async fn run_judge(
+    db: &DB,
+    user_db: &UserDB,
+    authed: &ApiAuthed,
+    w_id: &str,
+    provider: Box<RawValue>,
+    output: &RawValue,
+    checklist: &[String],
+) -> error::Result<JudgeResult> {
+    let checklist_rendered = checklist
+        .iter()
+        .map(|c| format!("- {c}"))
+        .collect::<Vec<_>>()
+        .join("\n");
+    let user_message = format!(
+        "OUTPUT:\n{}\n\nCHECKLIST:\n{}",
+        output.get(),
+        checklist_rendered
+    );
+
+    let output_schema = serde_json::json!({
+        "type": "object",
+        "properties": {
+            "score": { "type": "integer", "description": "0-100 overall quality" },
+            "pass": { "type": "boolean", "description": "true only if all criteria pass" },
+            "summary": { "type": "string", "description": "short rationale" }
+        },
+        "required": ["score", "pass", "summary"]
+    });
+
+    let mut it = HashMap::new();
+    it.insert("provider".to_string(), InputTransform::new_static_value(provider));
+    it.insert("system_prompt".to_string(), static_transform(&JUDGE_SYSTEM_PROMPT));
+    it.insert("user_message".to_string(), static_transform(&user_message));
+    it.insert("output_type".to_string(), static_transform(&"text"));
+    it.insert("output_schema".to_string(), static_transform(&output_schema));
+    it.insert("max_iterations".to_string(), static_transform(&1));
+
+    let flow = single_module_flow(FlowModuleValue::AIAgent {
+        input_transforms: it,
+        tools: vec![],
+        tag: None,
+        omit_output_from_conversation: false,
+        agent: None,
+    });
+
+    let (result, success, _) = run_preview(db, user_db, authed, w_id, flow).await?;
+    if !success {
+        return Err(Error::ExecutionErr(format!(
+            "judge run failed: {}",
+            result.get()
+        )));
+    }
+    parse_judge_result(&result)
+}
+
+/// Lenient parse: tolerate float scores and out-of-range values from the model.
+fn parse_judge_result(value: &RawValue) -> error::Result<JudgeResult> {
+    let v: serde_json::Value = serde_json::from_str(value.get())
+        .map_err(|e| Error::internal_err(format!("judge returned non-JSON output: {e}")))?;
+    let score = v
+        .get("score")
+        .and_then(|s| s.as_f64())
+        .map(|s| s.round().clamp(0.0, 100.0) as u8)
+        .unwrap_or(0);
+    let pass = v.get("pass").and_then(|p| p.as_bool()).unwrap_or(false);
+    let summary = v
+        .get("summary")
+        .and_then(|s| s.as_str())
+        .unwrap_or_default()
+        .to_string();
+    Ok(JudgeResult { score, pass, summary })
+}
+
+/// Read the saved agent's `provider` field to use as the default judge provider.
+async fn agent_provider(db: &DB, w_id: &str, path: &str) -> error::Result<Box<RawValue>> {
+    let value = sqlx::query_scalar!(
+        "SELECT value FROM resource WHERE workspace_id = $1 AND path = $2 AND resource_type = 'ai_agent'",
+        w_id,
+        path
+    )
+    .fetch_optional(db)
+    .await?
+    .flatten()
+    .ok_or_else(|| Error::NotFound(format!("ai_agent resource {path} not found")))?;
+
+    value
+        .get("provider")
+        .map(to_raw_value)
+        .ok_or_else(|| {
+            Error::BadRequest(format!(
+                "ai_agent resource {path} has no provider; pass judge_provider explicitly"
+            ))
+        })
+}
+
+// ---------------------------------------------------------------------------
+// Job push + wait
+// ---------------------------------------------------------------------------
+
+/// Push a single-step flow preview and block until it completes. Returns (result, success, ms).
+async fn run_preview(
+    db: &DB,
+    user_db: &UserDB,
+    authed: &ApiAuthed,
+    w_id: &str,
+    flow: FlowValue,
+) -> error::Result<(Box<RawValue>, bool, u64)> {
+    let started = Instant::now();
+    let args: HashMap<String, Box<RawValue>> = HashMap::new();
+    let tx = PushIsolationLevel::Isolated(user_db.clone(), authed.clone().into());
+
+    let (uuid, tx) = push(
+        db,
+        tx,
+        w_id,
+        JobPayload::RawFlow { value: flow, path: Some("ai_agent/eval".to_string()), restarted_from: None },
+        PushArgs::from(&args),
+        authed.display_username(),
+        &authed.email,
+        username_to_permissioned_as(&authed.username),
+        authed.token_prefix.as_deref(),
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        false,
+        false,
+        None,
+        true,
+        None,
+        None,
+        None,
+        None,
+        Some(&authed.clone().into()),
+        false,
+        None,
+        None,
+        None,
+    )
+    .await?;
+    tx.commit().await?;
+
+    let (result, success) =
+        run_wait_result_internal(db, uuid, w_id, None, false, &authed.username).await?;
+    Ok((result, success, started.elapsed().as_millis() as u64))
+}
+
+// ---------------------------------------------------------------------------
+// Deterministic assertions
+// ---------------------------------------------------------------------------
+
+/// Render an agent output as a plain string for text-based assertions. A JSON string output is
+/// unwrapped; anything else uses its compact JSON text.
+fn output_as_string(output: &RawValue) -> String {
+    match serde_json::from_str::<serde_json::Value>(output.get()) {
+        Ok(serde_json::Value::String(s)) => s,
+        _ => output.get().to_string(),
+    }
+}
+
+fn evaluate_assertion(assertion: &Assertion, output: &RawValue) -> AssertionResult {
+    let (passed, detail) = match assertion {
+        Assertion::Contains { value, case_sensitive } => {
+            let (haystack, needle) = casefold(output_as_string(output), value, *case_sensitive);
+            (haystack.contains(&needle), None)
+        }
+        Assertion::NotContains { value, case_sensitive } => {
+            let (haystack, needle) = casefold(output_as_string(output), value, *case_sensitive);
+            (!haystack.contains(&needle), None)
+        }
+        Assertion::Regex { pattern } => match regex::Regex::new(pattern) {
+            Ok(re) => (re.is_match(&output_as_string(output)), None),
+            Err(e) => (false, Some(format!("invalid regex: {e}"))),
+        },
+        Assertion::JsonPathEquals { path, value } => {
+            match serde_json::from_str::<serde_json::Value>(output.get()) {
+                Ok(json) => {
+                    let found = json_path_get(&json, path);
+                    (found == Some(value), found.map(|f| f.to_string()))
+                }
+                Err(e) => (false, Some(format!("output is not JSON: {e}"))),
+            }
+        }
+        Assertion::OutputSchemaValid => {
+            // v1: a basic structural check that the output is valid, non-null JSON. A full
+            // schema validation against the agent's output_schema is a fast-follow.
+            match serde_json::from_str::<serde_json::Value>(output.get()) {
+                Ok(serde_json::Value::Null) | Err(_) => (false, None),
+                Ok(_) => (true, None),
+            }
+        }
+    };
+    AssertionResult { assertion: assertion.clone(), passed, detail }
+}
+
+fn casefold(haystack: String, needle: &str, case_sensitive: bool) -> (String, String) {
+    if case_sensitive {
+        (haystack, needle.to_string())
+    } else {
+        (haystack.to_lowercase(), needle.to_lowercase())
+    }
+}
+
+/// Navigate a dotted JSON path (e.g. `data.items.0.name`) into a value.
+fn json_path_get<'a>(value: &'a serde_json::Value, path: &str) -> Option<&'a serde_json::Value> {
+    let mut current = value;
+    for segment in path.split('.') {
+        current = match current {
+            serde_json::Value::Object(map) => map.get(segment)?,
+            serde_json::Value::Array(arr) => arr.get(segment.parse::<usize>().ok()?)?,
+            _ => return None,
+        };
+    }
+    Some(current)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    fn raw(s: &str) -> Box<RawValue> {
+        RawValue::from_string(s.to_string()).unwrap()
+    }
+
+    #[test]
+    fn output_as_string_unwraps_json_strings() {
+        assert_eq!(output_as_string(&raw("\"hello\"")), "hello");
+        assert_eq!(output_as_string(&raw("{\"a\":1}")), "{\"a\":1}");
+        assert_eq!(output_as_string(&raw("42")), "42");
+    }
+
+    #[test]
+    fn contains_assertion_respects_case_sensitivity() {
+        let out = raw("\"Hello World\"");
+        let sensitive = Assertion::Contains { value: "hello".into(), case_sensitive: true };
+        assert!(!evaluate_assertion(&sensitive, &out).passed);
+        let insensitive = Assertion::Contains { value: "hello".into(), case_sensitive: false };
+        assert!(evaluate_assertion(&insensitive, &out).passed);
+    }
+
+    #[test]
+    fn not_contains_and_regex_and_schema_assertions() {
+        let out = raw("\"order 123 shipped\"");
+        assert!(evaluate_assertion(
+            &Assertion::NotContains { value: "refund".into(), case_sensitive: false },
+            &out
+        )
+        .passed);
+        assert!(evaluate_assertion(&Assertion::Regex { pattern: r"order \d+".into() }, &out).passed);
+        assert!(!evaluate_assertion(&Assertion::Regex { pattern: "(".into() }, &out).passed);
+        assert!(evaluate_assertion(&Assertion::OutputSchemaValid, &raw("{\"a\":1}")).passed);
+        assert!(!evaluate_assertion(&Assertion::OutputSchemaValid, &raw("null")).passed);
+    }
+
+    #[test]
+    fn json_path_navigates_objects_and_arrays() {
+        let v = json!({"data": {"items": [{"name": "a"}, {"name": "b"}]}});
+        assert_eq!(json_path_get(&v, "data.items.1.name"), Some(&json!("b")));
+        assert_eq!(json_path_get(&v, "data.missing"), None);
+        assert_eq!(json_path_get(&v, "data.items.9"), None);
+    }
+
+    #[test]
+    fn json_path_equals_assertion() {
+        let out = raw("{\"status\":\"ok\",\"count\":3}");
+        assert!(evaluate_assertion(
+            &Assertion::JsonPathEquals { path: "status".into(), value: json!("ok") },
+            &out
+        )
+        .passed);
+        assert!(!evaluate_assertion(
+            &Assertion::JsonPathEquals { path: "count".into(), value: json!(5) },
+            &out
+        )
+        .passed);
+    }
+
+    #[test]
+    fn parse_judge_result_is_lenient() {
+        // Float score is rounded and clamped; pass/summary parsed.
+        let j = parse_judge_result(&raw("{\"score\": 87.6, \"pass\": true, \"summary\": \"good\"}"))
+            .unwrap();
+        assert_eq!(j.score, 88);
+        assert!(j.pass);
+        assert_eq!(j.summary, "good");
+        // Missing fields default safely.
+        let j2 = parse_judge_result(&raw("{\"score\": 200}")).unwrap();
+        assert_eq!(j2.score, 100);
+        assert!(!j2.pass);
+    }
+}
diff --git a/backend/windmill-api/src/lib.rs b/backend/windmill-api/src/lib.rs
index cbbb406db4fc8..12a47821fd58f 100644
--- a/backend/windmill-api/src/lib.rs
+++ b/backend/windmill-api/src/lib.rs
@@ -66,6 +66,7 @@ use crate::scim_oss::has_scim_token;
 use windmill_common::error::AppError;
 
 mod ai;
+mod ai_agents;
 mod ai_skills;
 mod apps;
 pub mod args;
@@ -621,6 +622,7 @@ pub async fn run_server(
                             Router::new()
                         })
                         .nest("/ai", ai::workspaced_service())
+                        .nest("/ai_agents", ai_agents::workspaced_service())
                         .nest("/ai_skills", ai_skills::workspaced_service())
                         .nest("/npm_proxy", windmill_api_npm_proxy::workspaced_service())
                         .nest(
diff --git a/backend/windmill-types/src/flows.rs b/backend/windmill-types/src/flows.rs
index 006185dbd9184..bed0502c80d2f 100644
--- a/backend/windmill-types/src/flows.rs
+++ b/backend/windmill-types/src/flows.rs
@@ -1000,6 +1000,11 @@ pub enum FlowModuleValue {
         tag: Option<String>,
         #[serde(default, skip_serializing_if = "is_false")]
         omit_output_from_conversation: bool,
+        /// When set, the agent brain config (provider/model/system prompt/etc.) and tools are
+        /// resolved at runtime from this `ai_agent` resource path (hybrid linking). The module's
+        /// `input_transforms` then only carry the flow-local inputs (user_message/user_attachments).
+        #[serde(default, skip_serializing_if = "Option::is_none")]
+        agent: Option<String>,
     },
 }
 
@@ -1035,6 +1040,7 @@ struct UntaggedFlowModuleValue {
     assets: Option<Vec<AssetWithAltAccessType>>,
     tools: Option<Vec<AgentTool>>,
     omit_output_from_conversation: Option<bool>,
+    agent: Option<String>,
     pass_flow_input_directly: Option<bool>,
     squash: Option<bool>,
     #[serde(flatten)]
@@ -1133,13 +1139,14 @@ impl<'de> Deserialize<'de> for FlowModuleValue {
             "identity" => Ok(FlowModuleValue::Identity),
             "aiagent" => Ok(FlowModuleValue::AIAgent {
                 input_transforms: untagged.input_transforms.unwrap_or_default(),
-                tools: untagged
-                    .tools
-                    .ok_or_else(|| serde::de::Error::missing_field("tools"))?,
+                // Tools default to empty: a linked agent (see `agent`) resolves its tools from
+                // the referenced resource, so the module itself may carry none.
+                tools: untagged.tools.unwrap_or_default(),
                 tag: untagged.tag,
                 omit_output_from_conversation: untagged
                     .omit_output_from_conversation
                     .unwrap_or(false),
+                agent: untagged.agent,
             }),
             other => Err(serde::de::Error::unknown_variant(
                 other,
diff --git a/backend/windmill-worker/src/ai_executor.rs b/backend/windmill-worker/src/ai_executor.rs
index 2591cc649527a..5e6900fd7e713 100644
--- a/backend/windmill-worker/src/ai_executor.rs
+++ b/backend/windmill-worker/src/ai_executor.rs
@@ -35,7 +35,7 @@ use windmill_common::{
     error::{self, Error},
     flow_conversations::MessageType,
     flow_status::AgentAction,
-    flows::{FlowModule, FlowModuleValue, ToolValue},
+    flows::{AgentTool, FlowModule, FlowModuleValue, ToolValue},
     get_latest_hash_for_path,
     jobs::JobKind,
     scripts::get_full_hub_script_by_path,
@@ -156,14 +156,20 @@ pub async fn handle_ai_agent_job(
     has_stream: &mut bool,
 ) -> Result<Box<RawValue>, Error> {
     // build_args_map returns None if no $res:/$var: transforms needed, in which case use original args
-    let args = match build_args_map(job, client, conn).await? {
+    let local_args = match build_args_map(job, client, conn).await? {
         Some(transformed) => transformed,
         None => job.args.as_ref().map(|a| a.0.clone()).unwrap_or_default(),
     };
-    let args = serde_json::from_str::<AIAgentArgs>(&serde_json::to_string(&args)?)?;
 
-    // Handle dry_run mode - check credentials without making API calls
-    if args.credentials_check {
+    // Handle dry_run mode - check credentials without making API calls.
+    // The credentials check is always invoked inline (provider present, no agent link and no
+    // parent flow), so it resolves before any flow/agent-resource context is fetched.
+    let is_credentials_check = local_args
+        .get("credentials_check")
+        .map(|v| v.get().trim() == "true")
+        .unwrap_or(false);
+    if is_credentials_check {
+        let args = serde_json::from_str::<AIAgentArgs>(&serde_json::to_string(&local_args)?)?;
         return handle_credentials_check(&args.provider).await;
     }
 
@@ -249,14 +255,75 @@ pub async fn handle_ai_agent_job(
 
     let summary = module.summary.clone();
 
-    let FlowModuleValue::AIAgent { tools, omit_output_from_conversation, .. } =
-        module.get_value()?
+    let FlowModuleValue::AIAgent {
+        tools: module_tools, omit_output_from_conversation, agent, ..
+    } = module.get_value()?
     else {
         return Err(Error::internal_err(
             "AI agent module is not an AI agent".to_string(),
         ));
     };
 
+    // Hybrid linking: when the step references a saved `ai_agent` resource, the brain config
+    // (provider/model/system prompt/etc.) and the tool set come from that resource; only the
+    // flow-local inputs (user_message/user_attachments) come from the step. When not linked, the
+    // brain is the step's resolved input_transforms and the tools are the module's own.
+    // v1 boundary: a linked agent's tools come wholly from the resource; per-tool flow-context
+    // wiring is a fast-follow.
+    let (args, tools): (AIAgentArgs, Vec<AgentTool>) = if let Some(agent_ref) = agent.as_deref() {
+        let agent_path = agent_ref
+            .trim_start_matches("$res:")
+            .trim_start_matches("res://");
+        let resource_value = client
+            .get_resource_value_interpolated::<serde_json::Value>(
+                agent_path,
+                Some(job.id.to_string()),
+            )
+            .await
+            .map_err(|e| {
+                Error::internal_err(format!(
+                    "failed to load ai_agent resource {agent_path}: {e}"
+                ))
+            })?;
+        let mut config = match resource_value {
+            serde_json::Value::Object(map) => map,
+            _ => {
+                return Err(Error::internal_err(format!(
+                    "ai_agent resource {agent_path} must be a JSON object"
+                )))
+            }
+        };
+        // Overlay flow-local inputs onto the agent brain.
+        for key in ["user_message", "user_attachments"] {
+            if let Some(v) = local_args.get(key) {
+                config.insert(
+                    key.to_string(),
+                    serde_json::from_str(v.get()).unwrap_or(serde_json::Value::Null),
+                );
+            }
+        }
+        // A linked agent is rigid: brain and tools come wholly from the resource. To diverge, the
+        // step must be unlinked (forked), at which point the config is copied into the step.
+        let tools = match config.remove("tools") {
+            Some(t) => serde_json::from_value::<Vec<AgentTool>>(t).map_err(|e| {
+                Error::internal_err(format!(
+                    "invalid tools in ai_agent resource {agent_path}: {e}"
+                ))
+            })?,
+            None => Vec::new(),
+        };
+        let args = serde_json::from_value::<AIAgentArgs>(serde_json::Value::Object(config))
+            .map_err(|e| {
+                Error::internal_err(format!(
+                    "invalid ai_agent resource config {agent_path}: {e}"
+                ))
+            })?;
+        (args, tools)
+    } else {
+        let args = serde_json::from_str::<AIAgentArgs>(&serde_json::to_string(&local_args)?)?;
+        (args, module_tools)
+    };
+
     // Separate Windmill tools from MCP tools, websearch, and extract MCP resource configs
     let mut windmill_modules: Vec<FlowModule> = Vec::new();
     #[allow(unused_mut)]
diff --git a/backend/windmill-worker/src/worker_lockfiles.rs b/backend/windmill-worker/src/worker_lockfiles.rs
index d98a2cd2e06f8..e63d2260dfa8a 100644
--- a/backend/windmill-worker/src/worker_lockfiles.rs
+++ b/backend/windmill-worker/src/worker_lockfiles.rs
@@ -1250,6 +1250,7 @@ async fn lock_modules(
                     mut tools,
                     tag,
                     omit_output_from_conversation,
+                    agent,
                 } => {
                     // Extract FlowModules from tools and track their original indices
                     // MCP tools don't need locking, so we filter them out
@@ -1302,6 +1303,7 @@ async fn lock_modules(
                         tools,
                         tag,
                         omit_output_from_conversation,
+                        agent,
                     }
                     .into();
                 }
diff --git a/docs/reusable-ai-agents.md b/docs/reusable-ai-agents.md
new file mode 100644
index 0000000000000..94b3a824d179b
--- /dev/null
+++ b/docs/reusable-ai-agents.md
@@ -0,0 +1,36 @@
+# Reusable AI Agents
+
+An AI agent flow step can be saved as a **reusable agent** — a resource of the built-in
+`ai_agent` resource type that bundles the agent's brain (provider/model, system prompt,
+temperature, output schema, memory…), its tool set, and an **eval suite**. Other flows can
+link to the same agent, and edits to the agent propagate to every linked step.
+
+## Hybrid linking
+
+`FlowModuleValue::AIAgent` has an optional `agent` field holding the resource path. When set:
+
+- The brain config and tools are resolved at runtime from the resource
+  (`windmill-worker/src/ai_executor.rs`, via `get_resource_value_interpolated` — nested
+  provider `$res:` credentials resolve automatically).
+- The flow step keeps only the flow-local inputs (`user_message`, `user_attachments`) in its
+  `input_transforms`.
+- v1 boundary: a linked agent's tools come wholly from the resource (per-tool flow-context
+  wiring is a fast-follow).
+
+In the flow editor, the AI agent step's **Step Input** tab shows a bar to *Save as agent*,
+*Use saved agent* (picker), or *Unlink* (snapshots the resolved config back into the step).
+
+## Evals
+
+A saved agent carries `evals.cases`. Each case has an input message, an optional LLM-judge
+checklist, and optional deterministic assertions (`contains` / `not_contains` / `regex` /
+`json_path_equals` / `output_schema_valid`). The **Evals** tab authors and runs them.
+
+Cases run for real (they cost tokens): the backend pushes a single-step AIAgent flow-preview
+job and grades the output. The judge itself runs as an inline AIAgent step with a structured
+output schema (`{score, pass, summary}`), defaulting to the agent's own provider.
+
+- `POST /w/{workspace}/ai_agents/run` — run a saved agent once on an input.
+- `POST /w/{workspace}/ai_agents/eval_case` — run one eval case (execute + assertions + judge).
+
+Sharing works through standard resource folder permissions (save agents under `f/...`).
diff --git a/frontend/src/lib/components/flows/agentResourceUtils.test.ts b/frontend/src/lib/components/flows/agentResourceUtils.test.ts
new file mode 100644
index 0000000000000..18963e27cb340
--- /dev/null
+++ b/frontend/src/lib/components/flows/agentResourceUtils.test.ts
@@ -0,0 +1,38 @@
+import { describe, expect, it } from 'vitest'
+
+import { summarizeAgentBrain } from './agentResourceUtils'
+
+describe('summarizeAgentBrain', () => {
+	it('returns only set fields, in brain-key order, formatted', () => {
+		const rows = summarizeAgentBrain({
+			provider: { kind: 'openai', model: 'gpt-4o', resource: '$res:f/x/openai' } as any,
+			system_prompt: 'You are helpful',
+			temperature: 0.7,
+			streaming: true,
+			max_iterations: 10
+		})
+		expect(rows).toEqual([
+			{ label: 'Model', value: 'openai · gpt-4o' },
+			{ label: 'System prompt', value: 'You are helpful' },
+			{ label: 'Streaming', value: 'on' },
+			{ label: 'Temperature', value: '0.7' },
+			{ label: 'Max iterations', value: '10' }
+		])
+	})
+
+	it('skips empty/undefined fields', () => {
+		expect(summarizeAgentBrain({ system_prompt: '', provider: undefined as any })).toEqual([])
+		expect(summarizeAgentBrain(undefined)).toEqual([])
+	})
+
+	it('summarizes structured fields compactly', () => {
+		const rows = summarizeAgentBrain({
+			memory: { type: 'auto', context_length: 20 } as any,
+			output_schema: { type: 'object' } as any
+		})
+		expect(rows).toEqual([
+			{ label: 'Memory', value: 'auto' },
+			{ label: 'Output schema', value: 'configured' }
+		])
+	})
+})
diff --git a/frontend/src/lib/components/flows/agentResourceUtils.ts b/frontend/src/lib/components/flows/agentResourceUtils.ts
new file mode 100644
index 0000000000000..d6cfc7ec8718c
--- /dev/null
+++ b/frontend/src/lib/components/flows/agentResourceUtils.ts
@@ -0,0 +1,155 @@
+import type { InputTransform } from '$lib/gen'
+
+// The brain fields stored flat in an `ai_agent` resource value. The flow-local inputs
+// (user_message/user_attachments) are intentionally excluded — they are supplied per-flow.
+export const AGENT_BRAIN_KEYS = [
+	'provider',
+	'output_type',
+	'system_prompt',
+	'streaming',
+	'memory',
+	'output_schema',
+	'max_completion_tokens',
+	'temperature',
+	'max_iterations'
+] as const
+
+export const AGENT_FLOW_LOCAL_KEYS = ['user_message', 'user_attachments'] as const
+
+export type AgentTool = Record<string, any>
+
+export type AgentAssertion =
+	| { kind: 'contains'; value: string; case_sensitive?: boolean }
+	| { kind: 'not_contains'; value: string; case_sensitive?: boolean }
+	| { kind: 'regex'; pattern: string }
+	| { kind: 'json_path_equals'; path: string; value: unknown }
+	| { kind: 'output_schema_valid' }
+
+export interface AgentEvalCase {
+	id: string
+	name?: string
+	input: { user_message?: string; user_attachments?: unknown[] }
+	judge_checklist?: string[]
+	assertions?: AgentAssertion[]
+}
+
+export interface AgentEvalSuite {
+	cases: AgentEvalCase[]
+	judge?: unknown
+}
+
+/** Brain keys whose step transform is non-static and would be dropped by a save-as-agent snapshot. */
+export function nonStaticBrainKeys(
+	inputTransforms: Record<string, InputTransform> | undefined
+): string[] {
+	return AGENT_BRAIN_KEYS.filter((key) => {
+		const t = inputTransforms?.[key] as any
+		return t && t.type !== 'static'
+	})
+}
+
+export interface AIAgentConfig {
+	provider?: unknown
+	output_type?: string
+	system_prompt?: string
+	streaming?: boolean
+	memory?: unknown
+	output_schema?: unknown
+	max_completion_tokens?: number
+	temperature?: number
+	max_iterations?: number
+	tools?: AgentTool[]
+	evals?: AgentEvalSuite
+}
+
+/** Extract the static brain values from a step's input_transforms into a flat agent config. */
+export function inputTransformsToAgentConfig(
+	inputTransforms: Record<string, InputTransform> | undefined,
+	tools: AgentTool[] | undefined,
+	evals?: AgentEvalSuite
+): AIAgentConfig {
+	const config: AIAgentConfig = { tools: tools ?? [], evals: evals ?? { cases: [] } }
+	for (const key of AGENT_BRAIN_KEYS) {
+		const t = inputTransforms?.[key] as any
+		if (t && t.type === 'static' && t.value !== undefined) {
+			;(config as any)[key] = t.value
+		}
+	}
+	return config
+}
+
+/**
+ * Reduce the AI agent schema to only the flow-local inputs. Used when a step is linked to a saved
+ * agent: the brain fields come from the resource, so only user_message/user_attachments stay editable.
+ */
+export function flowLocalAgentSchema(schema: any): any {
+	if (!schema?.properties) {
+		return schema
+	}
+	const properties: Record<string, unknown> = {}
+	for (const key of AGENT_FLOW_LOCAL_KEYS) {
+		if (schema.properties[key]) {
+			properties[key] = schema.properties[key]
+		}
+	}
+	return {
+		...schema,
+		properties,
+		order: (schema.order ?? Object.keys(properties)).filter((k: string) => k in properties),
+		required: (schema.required ?? []).filter((k: string) => k in properties)
+	}
+}
+
+const AGENT_BRAIN_LABELS: Record<string, string> = {
+	provider: 'Model',
+	output_type: 'Output type',
+	system_prompt: 'System prompt',
+	streaming: 'Streaming',
+	memory: 'Memory',
+	output_schema: 'Output schema',
+	max_completion_tokens: 'Max tokens',
+	temperature: 'Temperature',
+	max_iterations: 'Max iterations'
+}
+
+/** Flatten a saved agent's brain config into human-readable label/value rows for a read-only
+ * display on a linked step. Only set fields are returned, in the canonical brain-key order. */
+export function summarizeAgentBrain(
+	config: AIAgentConfig | undefined
+): { label: string; value: string }[] {
+	const rows: { label: string; value: string }[] = []
+	for (const key of AGENT_BRAIN_KEYS) {
+		const v = (config as any)?.[key]
+		if (v === undefined || v === null || v === '') continue
+		let value: string
+		if (key === 'provider') {
+			value = [v.kind, v.model].filter(Boolean).join(' · ') || 'configured'
+		} else if (key === 'memory') {
+			value = typeof v === 'object' ? (v.type ?? 'configured') : String(v)
+		} else if (key === 'output_schema') {
+			value = 'configured'
+		} else if (typeof v === 'boolean') {
+			value = v ? 'on' : 'off'
+		} else if (typeof v === 'object') {
+			value = JSON.stringify(v)
+		} else {
+			value = String(v)
+		}
+		rows.push({ label: AGENT_BRAIN_LABELS[key] ?? key, value })
+	}
+	return rows
+}
+
+/** Inverse: wrap brain config values as static input_transforms (used when unlinking a step). */
+export function agentConfigToInputTransforms(
+	config: AIAgentConfig
+): Record<string, InputTransform> {
+	const it: Record<string, InputTransform> = {}
+	for (const key of AGENT_BRAIN_KEYS) {
+		const v = (config as any)[key]
+		if (v !== undefined) {
+			it[key] = { type: 'static', value: v } as InputTransform
+		}
+	}
+	return it
+}
diff --git a/frontend/src/lib/components/flows/content/AgentEvalsPanel.svelte b/frontend/src/lib/components/flows/content/AgentEvalsPanel.svelte
new file mode 100644
index 0000000000000..c426994823470
--- /dev/null
+++ b/frontend/src/lib/components/flows/content/AgentEvalsPanel.svelte
@@ -0,0 +1,369 @@
+<script lang="ts">
+	import { Button } from '$lib/components/common'
+	import Select from '$lib/components/select/Select.svelte'
+	import { AiAgentService, ResourceService } from '$lib/gen'
+	import { workspaceStore } from '$lib/stores'
+	import { sendUserToast } from '$lib/toast'
+	import { randomUUID } from '$lib/utils/uuid'
+	import { Play, Plus, Save, Trash2, Loader2, Check, X, ChevronDown, ChevronRight } from 'lucide-svelte'
+	import type {
+		AgentAssertion,
+		AgentEvalCase,
+		AIAgentConfig
+	} from '../agentResourceUtils'
+
+	let { agent }: { agent: string } = $props()
+
+	type AssertionResult = { assertion: AgentAssertion; passed: boolean; detail?: string }
+	type JudgeResult = { score: number; pass: boolean; summary: string }
+	type CaseResult = {
+		case_id: string
+		passed: boolean
+		output?: unknown
+		error?: string
+		assertions?: AssertionResult[]
+		judge?: JudgeResult
+		latency_ms: number
+	}
+
+	let resourceValue: AIAgentConfig = $state({})
+	let cases: AgentEvalCase[] = $state([])
+	let results: Record<string, CaseResult | 'running'> = $state({})
+	let expanded: Record<string, boolean> = $state({})
+	let loading = $state(true)
+	let saving = $state(false)
+
+	const ASSERTION_KINDS = [
+		{ label: 'contains', value: 'contains' },
+		{ label: 'does not contain', value: 'not_contains' },
+		{ label: 'matches regex', value: 'regex' },
+		{ label: 'json path equals', value: 'json_path_equals' },
+		{ label: 'valid output schema', value: 'output_schema_valid' }
+	]
+
+	async function load() {
+		if (!$workspaceStore) return
+		loading = true
+		try {
+			const res = await ResourceService.getResource({ workspace: $workspaceStore, path: agent })
+			resourceValue = (res.value ?? {}) as AIAgentConfig
+			cases = resourceValue.evals?.cases ?? []
+		} catch (e) {
+			sendUserToast(`Failed to load agent: ${e}`, true)
+		} finally {
+			loading = false
+		}
+	}
+
+	$effect(() => {
+		agent && $workspaceStore && load()
+	})
+
+	function addCase() {
+		cases = [
+			...cases,
+			{ id: randomUUID(), name: `Case ${cases.length + 1}`, input: { user_message: '' }, judge_checklist: [], assertions: [] }
+		]
+	}
+
+	function removeCase(id: string) {
+		cases = cases.filter((c) => c.id !== id)
+		delete results[id]
+	}
+
+	function checklistText(c: AgentEvalCase): string {
+		return (c.judge_checklist ?? []).join('\n')
+	}
+	function setChecklist(c: AgentEvalCase, text: string) {
+		c.judge_checklist = text
+			.split('\n')
+			.map((l) => l.trim())
+			.filter(Boolean)
+	}
+
+	let newAssertionKind: Record<string, string> = $state({})
+	function addAssertion(c: AgentEvalCase) {
+		const kind = newAssertionKind[c.id] ?? 'contains'
+		let a: AgentAssertion
+		if (kind === 'regex') a = { kind: 'regex', pattern: '' }
+		else if (kind === 'json_path_equals') a = { kind: 'json_path_equals', path: '', value: '' }
+		else if (kind === 'output_schema_valid') a = { kind: 'output_schema_valid' }
+		else if (kind === 'not_contains') a = { kind: 'not_contains', value: '' }
+		else a = { kind: 'contains', value: '' }
+		c.assertions = [...(c.assertions ?? []), a]
+	}
+	function removeAssertion(c: AgentEvalCase, i: number) {
+		c.assertions = (c.assertions ?? []).filter((_, idx) => idx !== i)
+	}
+
+	async function save() {
+		if (!$workspaceStore) return
+		saving = true
+		try {
+			const value = { ...resourceValue, evals: { ...(resourceValue.evals ?? {}), cases } }
+			await ResourceService.updateResource({
+				workspace: $workspaceStore,
+				path: agent,
+				requestBody: { value }
+			})
+			resourceValue = value
+			sendUserToast('Saved eval cases')
+		} catch (e) {
+			sendUserToast(`Failed to save: ${e}`, true)
+		} finally {
+			saving = false
+		}
+	}
+
+	async function runCase(c: AgentEvalCase) {
+		if (!$workspaceStore) return
+		results[c.id] = 'running'
+		try {
+			const resp = await AiAgentService.evalAiAgentCase({
+				workspace: $workspaceStore,
+				requestBody: { agent, case: c as unknown as Record<string, unknown> }
+			})
+			results[c.id] = resp as unknown as CaseResult
+		} catch (e) {
+			results[c.id] = {
+				case_id: c.id,
+				passed: false,
+				error: String(e),
+				latency_ms: 0
+			}
+		}
+	}
+
+	async function runAll() {
+		// Save first so the linked agent reflects the latest cases, then run sequentially.
+		await save()
+		for (const c of cases) {
+			await runCase(c)
+		}
+	}
+
+	function outputText(output: unknown): string {
+		if (typeof output === 'string') return output
+		return JSON.stringify(output, null, 2)
+	}
+
+	let passedCount = $derived(
+		cases.filter((c) => {
+			const r = results[c.id]
+			return r && r !== 'running' && r.passed
+		}).length
+	)
+	let ranCount = $derived(
+		cases.filter((c) => {
+			const r = results[c.id]
+			return r && r !== 'running'
+		}).length
+	)
+</script>
+
+<div class="flex flex-col gap-3 p-3">
+	<div class="flex items-center gap-2">
+		<span class="text-sm font-semibold">Evals</span>
+		{#if ranCount > 0}
+			<span class="text-xs text-secondary">{passedCount}/{ranCount} passed</span>
+		{/if}
+		<div class="ml-auto flex items-center gap-1">
+			<Button size="xs2" variant="default" startIcon={{ icon: Plus }} onclick={addCase}>
+				Add case
+			</Button>
+			<Button
+				size="xs2"
+				variant="default"
+				startIcon={{ icon: Save }}
+				disabled={saving}
+				onclick={save}
+			>
+				Save
+			</Button>
+			<Button
+				size="xs2"
+				variant="accent"
+				startIcon={{ icon: Play }}
+				disabled={cases.length === 0}
+				onclick={runAll}
+			>
+				Run all
+			</Button>
+		</div>
+	</div>
+
+	{#if loading}
+		<div class="flex items-center gap-2 text-xs text-secondary">
+			<Loader2 size={14} class="animate-spin" /> Loading…
+		</div>
+	{:else if cases.length === 0}
+		<p class="text-xs text-secondary">
+			No eval cases yet. Add a case with an input message, then grade it with a judge checklist
+			and/or deterministic assertions.
+		</p>
+	{:else}
+		{#each cases as c (c.id)}
+			{@const r = results[c.id]}
+			<div class="rounded-md border border-border p-3 flex flex-col gap-2">
+				<div class="flex items-center gap-2">
+					<input
+						class="text-xs font-medium grow border border-border-light rounded px-2 py-1 bg-surface-input"
+						bind:value={c.name}
+						placeholder="Case name"
+					/>
+					{#if r === 'running'}
+						<Loader2 size={14} class="animate-spin text-secondary" />
+					{:else if r}
+						{#if r.passed}
+							<span class="flex items-center gap-1 text-xs text-green-600">
+								<Check size={14} /> Pass
+							</span>
+						{:else}
+							<span class="flex items-center gap-1 text-xs text-red-600">
+								<X size={14} /> Fail
+							</span>
+						{/if}
+						{#if r.judge}
+							<span class="text-2xs text-secondary">judge {r.judge.score}</span>
+						{/if}
+						<span class="text-2xs text-tertiary">{r.latency_ms}ms</span>
+					{/if}
+					<Button
+						size="xs2"
+						variant="default"
+						startIcon={{ icon: Play }}
+						disabled={r === 'running'}
+						onclick={() => runCase(c)}
+					>
+						Run
+					</Button>
+					<Button
+						size="xs2"
+						variant="default"
+						iconOnly
+						startIcon={{ icon: Trash2 }}
+						onclick={() => removeCase(c.id)}
+					/>
+				</div>
+
+				<label class="flex flex-col gap-1 text-2xs text-secondary">
+					Input message
+					<textarea
+						class="text-xs border border-border-light rounded px-2 py-1 bg-surface-input"
+						rows="2"
+						bind:value={c.input.user_message}
+						placeholder="The user message to send to the agent"
+					></textarea>
+				</label>
+
+				<label class="flex flex-col gap-1 text-2xs text-secondary">
+					Judge checklist (one criterion per line)
+					<textarea
+						class="text-xs border border-border-light rounded px-2 py-1 bg-surface-input"
+						rows="2"
+						value={checklistText(c)}
+						oninput={(e) => setChecklist(c, e.currentTarget.value)}
+						placeholder="e.g. Mentions the order id&#10;Tone is professional"
+					></textarea>
+				</label>
+
+				<div class="flex flex-col gap-1">
+					<span class="text-2xs text-secondary">Assertions</span>
+					{#each c.assertions ?? [] as a, i (i)}
+						<div class="flex items-center gap-1 text-xs">
+							<span class="text-2xs text-tertiary w-28 shrink-0">{a.kind}</span>
+							{#if a.kind === 'contains' || a.kind === 'not_contains'}
+								<input
+									class="text-xs grow border border-border-light rounded px-2 py-0.5 bg-surface-input"
+									bind:value={a.value}
+									placeholder="substring"
+								/>
+							{:else if a.kind === 'regex'}
+								<input
+									class="text-xs grow border border-border-light rounded px-2 py-0.5 bg-surface-input"
+									bind:value={a.pattern}
+									placeholder="pattern"
+								/>
+							{:else if a.kind === 'json_path_equals'}
+								<input
+									class="text-xs w-32 border border-border-light rounded px-2 py-0.5 bg-surface-input"
+									bind:value={a.path}
+									placeholder="a.b.0"
+								/>
+								<input
+									class="text-xs grow border border-border-light rounded px-2 py-0.5 bg-surface-input"
+									value={typeof a.value === 'string' ? a.value : JSON.stringify(a.value)}
+									oninput={(e) => {
+										try {
+											a.value = JSON.parse(e.currentTarget.value)
+										} catch {
+											a.value = e.currentTarget.value
+										}
+									}}
+									placeholder="expected value"
+								/>
+							{:else}
+								<span class="text-2xs text-tertiary grow">output is valid non-null JSON</span>
+							{/if}
+							<Button
+								size="xs3"
+								variant="default"
+								iconOnly
+								startIcon={{ icon: X }}
+								onclick={() => removeAssertion(c, i)}
+							/>
+						</div>
+					{/each}
+					<div class="flex items-center gap-1">
+						<div class="w-40">
+							<Select
+								items={ASSERTION_KINDS}
+								bind:value={newAssertionKind[c.id]}
+								placeholder="contains"
+							/>
+						</div>
+						<Button
+							size="xs3"
+							variant="default"
+							startIcon={{ icon: Plus }}
+							onclick={() => addAssertion(c)}
+						>
+							Add assertion
+						</Button>
+					</div>
+				</div>
+
+				{#if r && r !== 'running'}
+					<div class="border-t border-border-light pt-2">
+						<button
+							class="flex items-center gap-1 text-2xs text-secondary"
+							onclick={() => (expanded[c.id] = !expanded[c.id])}
+						>
+							{#if expanded[c.id]}<ChevronDown size={12} />{:else}<ChevronRight size={12} />{/if}
+							Result
+						</button>
+						{#if expanded[c.id]}
+							<div class="mt-1 flex flex-col gap-1 text-2xs">
+								{#if r.error}
+									<div class="text-red-600">{r.error}</div>
+								{/if}
+								{#if r.judge}
+									<div><span class="text-tertiary">Judge:</span> {r.judge.summary}</div>
+								{/if}
+								{#each r.assertions ?? [] as ar, ai (ai)}
+									<div class={ar.passed ? 'text-green-600' : 'text-red-600'}>
+										{ar.passed ? '✓' : '✗'} {ar.assertion.kind}
+										{#if ar.detail}<span class="text-tertiary"> — {ar.detail}</span>{/if}
+									</div>
+								{/each}
+								{#if r.output !== undefined}
+									<pre class="bg-surface-secondary rounded p-2 overflow-auto max-h-48 whitespace-pre-wrap">{outputText(r.output)}</pre>
+								{/if}
+							</div>
+						{/if}
+					</div>
+				{/if}
+			</div>
+		{/each}
+	{/if}
+</div>
diff --git a/frontend/src/lib/components/flows/content/AgentResourceBar.svelte b/frontend/src/lib/components/flows/content/AgentResourceBar.svelte
new file mode 100644
index 0000000000000..72a487880bb9c
--- /dev/null
+++ b/frontend/src/lib/components/flows/content/AgentResourceBar.svelte
@@ -0,0 +1,362 @@
+<script lang="ts">
+	import { Button, Drawer, DrawerContent } from '$lib/components/common'
+	import Path from '$lib/components/Path.svelte'
+	import LightweightResourcePicker from '$lib/components/LightweightResourcePicker.svelte'
+	import { ResourceService, type InputTransform } from '$lib/gen'
+	import { workspaceStore } from '$lib/stores'
+	import { sendUserToast } from '$lib/toast'
+	import { Bot, Save, Unlink, Pencil, TriangleAlert } from 'lucide-svelte'
+	import {
+		AGENT_FLOW_LOCAL_KEYS,
+		agentConfigToInputTransforms,
+		inputTransformsToAgentConfig,
+		nonStaticBrainKeys,
+		summarizeAgentBrain,
+		type AIAgentConfig,
+		type AgentTool
+	} from '../agentResourceUtils'
+	import { resource } from 'runed'
+
+	let {
+		agent = $bindable(),
+		inputTransforms = $bindable(),
+		tools = $bindable()
+	}: {
+		agent: string | undefined
+		inputTransforms: Record<string, InputTransform>
+		tools: AgentTool[]
+	} = $props()
+
+	let saveDrawer: Drawer | undefined = $state()
+	let newPath = $state('')
+	let pathError = $state('')
+	let description = $state('')
+	let saving = $state(false)
+	let pickerValue: string | undefined = $state(undefined)
+	// Set when the step was forked from a saved agent for editing: it's now an editable standalone
+	// step, but "Save changes" upserts back to this path (and re-links), propagating to all flows.
+	let editingPath: string | undefined = $state(undefined)
+
+	type LinkedInfo = {
+		config: AIAgentConfig
+		tools: AgentTool[]
+		providerPath?: string
+		providerOk: boolean
+	}
+
+	// A linked agent is rigid and read-only: its brain, tools and evals come from the resource. We
+	// load them here for display, and probe the provider resource so we can warn when it isn't
+	// accessible in this workspace (the user then needs to unlink/fork or gain access).
+	let linkedResource = resource(
+		() => ({ ws: $workspaceStore, path: agent }),
+		async ({ ws, path }): Promise<LinkedInfo> => {
+			if (!ws || !path) {
+				return { config: {}, tools: [], providerOk: true }
+			}
+			const res = await ResourceService.getResource({ workspace: ws, path })
+			const cfg = (res.value ?? {}) as AIAgentConfig & { provider?: { resource?: string } }
+			const tools = (cfg.tools ?? []) as AgentTool[]
+			const providerRef = cfg.provider?.resource
+			const providerPath =
+				typeof providerRef === 'string' && providerRef
+					? providerRef.replace(/^\$res:/, '').replace(/^res:\/\//, '')
+					: undefined
+			let providerOk = true
+			if (providerPath) {
+				try {
+					await ResourceService.getResource({ workspace: ws, path: providerPath })
+				} catch {
+					providerOk = false
+				}
+			}
+			return { config: cfg, tools, providerPath, providerOk }
+		}
+	)
+	let inheritedTools = $derived(linkedResource.current?.tools ?? [])
+	let brainParams = $derived(summarizeAgentBrain(linkedResource.current?.config))
+	let providerPath = $derived(linkedResource.current?.providerPath)
+	let providerOk = $derived(linkedResource.current?.providerOk ?? true)
+
+	// Link the step as soon as a saved agent is picked. Linking is rigid, so the step keeps no tools
+	// of its own — they come from the resource.
+	$effect(() => {
+		if (pickerValue && pickerValue !== agent) {
+			agent = pickerValue
+			tools = []
+		}
+	})
+
+	function toolLabel(tool: AgentTool): string {
+		return tool.summary || tool.value?.tool_type || tool.id
+	}
+
+	function openSave() {
+		newPath = editingPath ?? ''
+		pathError = ''
+		description = ''
+		saveDrawer?.openDrawer()
+	}
+
+	// Create or update the `ai_agent` resource at `path` from the step's current brain + tools, then
+	// link the step to it. Updating preserves the resource's existing eval suite (evals are edited
+	// in place on the linked step's Evals tab, not here).
+	async function persist(path: string, description?: string) {
+		const dropped = nonStaticBrainKeys(inputTransforms)
+		if (dropped.length > 0) {
+			sendUserToast(
+				`Note: ${dropped.join(', ')} use a computed/connected value and won't be saved into the agent`,
+				true
+			)
+		}
+		const value = inputTransformsToAgentConfig(inputTransforms, tools)
+		const exists = await ResourceService.existsResource({ workspace: $workspaceStore!, path })
+		if (exists) {
+			const cur = await ResourceService.getResource({ workspace: $workspaceStore!, path })
+			const curEvals = (cur.value as AIAgentConfig | undefined)?.evals
+			if (curEvals) {
+				value.evals = curEvals
+			}
+			await ResourceService.updateResourceValue({
+				workspace: $workspaceStore!,
+				path,
+				requestBody: { value }
+			})
+		} else {
+			await ResourceService.createResource({
+				workspace: $workspaceStore!,
+				requestBody: {
+					path,
+					value,
+					resource_type: 'ai_agent',
+					description: description || 'Reusable AI agent'
+				}
+			})
+		}
+		agent = path
+		// The brain + tools now live in the resource; a linked step keeps none of its own.
+		tools = []
+		editingPath = undefined
+	}
+
+	async function saveAsAgent() {
+		if (!$workspaceStore || pathError || !newPath) {
+			return
+		}
+		saving = true
+		try {
+			const updating = newPath === editingPath
+			await persist(newPath, description)
+			saveDrawer?.closeDrawer()
+			sendUserToast(updating ? `Updated agent ${newPath}` : `Saved reusable agent ${newPath}`)
+		} catch (e) {
+			sendUserToast(`Failed to save agent: ${e}`, true)
+		} finally {
+			saving = false
+		}
+	}
+
+	// Save the forked-for-edit step back to the agent it came from, updating it in place.
+	async function saveChanges() {
+		if (!$workspaceStore || !editingPath) {
+			return
+		}
+		saving = true
+		const path = editingPath
+		try {
+			await persist(path)
+			sendUserToast(`Updated agent ${path}`)
+		} catch (e) {
+			sendUserToast(`Failed to update agent: ${e}`, true)
+		} finally {
+			saving = false
+		}
+	}
+
+	// Copy the linked resource's brain + tools into the step, turning it back into a standalone
+	// editable agent. Shared by Unlink (diverge here) and Edit (edit the saved agent itself).
+	async function forkFromResource(): Promise<string | undefined> {
+		if (!$workspaceStore || !agent) {
+			return undefined
+		}
+		const path = agent
+		const res = await ResourceService.getResource({ workspace: $workspaceStore, path })
+		const cfg = (res.value ?? {}) as AIAgentConfig
+		const brain = agentConfigToInputTransforms(cfg)
+		// Preserve the flow-local inputs already wired in the step.
+		const local: Record<string, InputTransform> = {}
+		for (const key of AGENT_FLOW_LOCAL_KEYS) {
+			if (inputTransforms?.[key]) {
+				local[key] = inputTransforms[key]
+			}
+		}
+		inputTransforms = { ...brain, ...local }
+		tools = cfg.tools ?? []
+		agent = undefined
+		pickerValue = undefined
+		return path
+	}
+
+	// Unlink forks the agent into this step so it can diverge here. It does not write back.
+	async function unlink() {
+		try {
+			const path = await forkFromResource()
+			if (path) {
+				editingPath = undefined
+				sendUserToast('Forked agent — its configuration was copied into this step')
+			}
+		} catch (e) {
+			sendUserToast(`Failed to unlink agent: ${e}`, true)
+		}
+	}
+
+	// Edit the saved agent itself: fork it into the step for editing, remembering the path so
+	// "Save changes" writes back to it (updating every flow that links to it).
+	async function editAgent() {
+		try {
+			const path = await forkFromResource()
+			if (path) {
+				editingPath = path
+				sendUserToast(`Editing ${path} — make changes, then Save changes to update it`)
+			}
+		} catch (e) {
+			sendUserToast(`Failed to edit agent: ${e}`, true)
+		}
+	}
+</script>
+
+<div class="px-2 xl:px-4 pt-2">
+	{#if agent}
+		<div
+			class="flex items-center gap-2 rounded-md border border-border bg-surface-secondary px-3 py-2 text-xs"
+		>
+			<Bot size={16} class="text-primary shrink-0" />
+			<span class="text-secondary">Linked to</span>
+			<a class="font-medium truncate" href={`/resources?path=${agent}`} title={agent}>{agent}</a>
+			<div class="ml-auto flex items-center gap-1">
+				<Button size="xs2" variant="default" startIcon={{ icon: Pencil }} onclick={editAgent}>
+					Edit
+				</Button>
+				<Button size="xs2" variant="default" startIcon={{ icon: Unlink }} onclick={unlink}>
+					Unlink
+				</Button>
+			</div>
+		</div>
+		<p class="text-2xs text-tertiary mt-1">
+			The configuration below comes from this saved agent and is read-only. Only the message/inputs
+			are set in this flow. <span class="font-medium">Edit</span> to change the agent everywhere it's
+			used, or Unlink to fork an editable copy into just this step.
+		</p>
+		{#if brainParams.length > 0 || inheritedTools.length > 0}
+			<div class="mt-1 rounded-md border border-border bg-surface-secondary px-3 py-2">
+				<div class="text-2xs font-medium text-tertiary uppercase tracking-wide">
+					From saved agent · read-only
+				</div>
+				<dl class="mt-1 flex flex-col gap-1">
+					{#each brainParams as param (param.label)}
+						<div class="flex items-baseline gap-2 text-2xs">
+							<dt class="text-tertiary shrink-0 w-28">{param.label}</dt>
+							<dd class="text-secondary truncate" title={param.value}>{param.value}</dd>
+						</div>
+					{/each}
+					{#if inheritedTools.length > 0}
+						<div class="flex items-baseline gap-2 text-2xs">
+							<dt class="text-tertiary shrink-0 w-28">Tools</dt>
+							<dd class="flex flex-wrap gap-1">
+								{#each inheritedTools as tool (tool.id)}
+									<span
+										class="inline-flex items-center rounded border border-border bg-surface px-1.5 py-0.5 text-secondary"
+										title={tool.id}
+									>
+										{toolLabel(tool)}
+									</span>
+								{/each}
+							</dd>
+						</div>
+					{/if}
+				</dl>
+			</div>
+		{/if}
+		{#if !providerOk}
+			<div
+				class="mt-1 flex items-start gap-2 rounded-md border border-red-300 bg-red-50 px-3 py-2 text-2xs text-red-700 dark:border-red-800 dark:bg-red-900/30 dark:text-red-300"
+			>
+				<TriangleAlert size={14} class="mt-0.5 shrink-0" />
+				<span>
+					This agent's model provider{#if providerPath}
+						(<span class="font-medium">{providerPath}</span>){/if} isn't accessible in this workspace.
+					Unlink to fork the agent, or gain access to the provider resource{#if providerPath}
+						at <span class="font-medium">{providerPath}</span>{/if}.
+				</span>
+			</div>
+		{/if}
+	{:else if editingPath}
+		<div
+			class="flex items-center gap-2 rounded-md border border-border bg-surface-secondary px-3 py-2 text-xs"
+		>
+			<Pencil size={16} class="text-primary shrink-0" />
+			<span class="text-secondary">Editing</span>
+			<span class="font-medium truncate" title={editingPath}>{editingPath}</span>
+			<div class="ml-auto flex items-center gap-1">
+				<Button
+					size="xs2"
+					variant="accent"
+					startIcon={{ icon: Save }}
+					disabled={saving}
+					onclick={saveChanges}
+				>
+					Save changes
+				</Button>
+				<Button size="xs2" variant="default" onclick={() => (editingPath = undefined)}>
+					Cancel
+				</Button>
+			</div>
+		</div>
+		<p class="text-2xs text-tertiary mt-1">
+			Editing the saved agent. Save changes updates it and re-links this step — the update
+			propagates to every flow that links to it. Cancel keeps your edits here as a standalone step
+			instead.
+		</p>
+	{:else}
+		<div class="flex items-center gap-2">
+			<div class="grow min-w-0">
+				<LightweightResourcePicker bind:value={pickerValue} resourceType="ai_agent" />
+			</div>
+			<span class="text-2xs text-tertiary">or</span>
+			<Button size="xs2" variant="default" startIcon={{ icon: Save }} onclick={openSave}>
+				Save as agent
+			</Button>
+		</div>
+	{/if}
+</div>
+
+<Drawer bind:this={saveDrawer} size="600px">
+	<DrawerContent title="Save as reusable agent" on:close={() => saveDrawer?.closeDrawer()}>
+		<div class="flex flex-col gap-4">
+			<p class="text-xs text-secondary">
+				Save this AI agent's configuration and tools as a reusable resource. Other flows can then
+				link to it, and updates propagate automatically.
+			</p>
+			<Path
+				bind:path={newPath}
+				bind:error={pathError}
+				initialPath=""
+				namePlaceholder="my_agent"
+				kind="resource"
+			/>
+			<label class="flex flex-col gap-1 text-xs">
+				<span class="text-secondary">Description</span>
+				<input class="text-xs" bind:value={description} placeholder="What this agent does" />
+			</label>
+		</div>
+		{#snippet actions()}
+			<Button
+				variant="accent"
+				startIcon={{ icon: Save }}
+				disabled={!newPath || !!pathError || saving}
+				onclick={saveAsAgent}
+			>
+				Save agent
+			</Button>
+		{/snippet}
+	</DrawerContent>
+</Drawer>
diff --git a/frontend/src/lib/components/flows/content/FlowModuleComponent.svelte b/frontend/src/lib/components/flows/content/FlowModuleComponent.svelte
index 46a5c78d329d9..2fcf6fbce2b77 100644
--- a/frontend/src/lib/components/flows/content/FlowModuleComponent.svelte
+++ b/frontend/src/lib/components/flows/content/FlowModuleComponent.svelte
@@ -36,6 +36,9 @@
 	import FlowModuleSleep from './FlowModuleSleep.svelte'
 	import FlowPathViewer from './FlowPathViewer.svelte'
 	import InputTransformSchemaForm from '$lib/components/InputTransformSchemaForm.svelte'
+	import AgentResourceBar from './AgentResourceBar.svelte'
+	import AgentEvalsPanel from './AgentEvalsPanel.svelte'
+	import { flowLocalAgentSchema } from '../agentResourceUtils'
 	import FlowModuleMockTransitionMessage from './FlowModuleMockTransitionMessage.svelte'
 	import Tooltip from '$lib/components/Tooltip.svelte'
 	import { SecondsInput } from '$lib/components/common'
@@ -164,6 +167,9 @@
 			flowModule.value.type === 'aiagent'
 	)
 	let visibleSelected = $derived(selected === 'chat' && !canShowChatTab ? 'inputs' : selected)
+	let agentLinked = $derived(
+		flowModule.value.type === 'aiagent' && Boolean((flowModule.value as any).agent)
+	)
 	let advancedSelected = $state('retries')
 	let advancedRuntimeSelected = $state('concurrency')
 	let s3Kind = $state('s3_client')
@@ -1028,12 +1034,43 @@
 												label="Chat"
 											/>
 										{/if}
+										{#if flowModule.value.type === 'aiagent' && !isAgentTool}
+											<Tab value="evals" label="Evals" active={agentLinked} />
+										{/if}
 										{#if !preprocessorModule && !isAgentTool}
 											<Tab value="advanced" label="Advanced" />
 										{/if}
 									</Tabs>
 									{#if visibleSelected === 'inputs' && (flowModule.value.type == 'rawscript' || flowModule.value.type == 'script' || flowModule.value.type == 'flow' || flowModule.value.type == 'aiagent')}
 										<div class="flex-1 overflow-auto" id="flow-editor-step-input">
+											{#if flowModule.value.type === 'aiagent'}
+												<AgentResourceBar
+													bind:agent={
+														() => (flowModule.value as any).agent,
+														(v) => {
+															if (flowModule.value.type === 'aiagent') {
+																;(flowModule.value as any).agent = v
+															}
+														}
+													}
+													bind:inputTransforms={
+														() => (flowModule.value as any).input_transforms,
+														(v) => {
+															if (flowModule.value.type === 'aiagent') {
+																;(flowModule.value as any).input_transforms = v
+															}
+														}
+													}
+													bind:tools={
+														() => (flowModule.value as any).tools ?? [],
+														(v) => {
+															if (flowModule.value.type === 'aiagent') {
+																;(flowModule.value as any).tools = v
+															}
+														}
+													}
+												/>
+											{/if}
 											<PropPickerWrapper
 												pickableProperties={stepPropPicker.pickableProperties}
 												error={failureModule}
@@ -1049,7 +1086,9 @@
 													class="px-2 xl:px-4 pb-8"
 													bind:this={inputTransformSchemaForm}
 													pickableProperties={stepPropPicker.pickableProperties}
-													schema={flowStateStore.val[selectedId]?.schema ?? {}}
+													schema={agentLinked
+														? flowLocalAgentSchema(flowStateStore.val[selectedId]?.schema ?? {})
+														: (flowStateStore.val[selectedId]?.schema ?? {})}
 													previousModuleId={previousModule?.id}
 													bind:args={
 														() => {
@@ -1127,6 +1166,17 @@
 												/>
 											</Section>
 										</div>
+									{:else if visibleSelected === 'evals' && flowModule.value.type === 'aiagent'}
+										<div class="flex-1 overflow-auto">
+											{#if agentLinked && (flowModule.value as any).agent}
+												<AgentEvalsPanel agent={(flowModule.value as any).agent} />
+											{:else}
+												<div class="p-4 text-xs text-secondary">
+													Save this step as a reusable agent (in the Step Input tab) to author and
+													run evals against it.
+												</div>
+											{/if}
+										</div>
 									{:else if visibleSelected === 'advanced'}
 										<Tabs bind:selected={advancedSelected} wrapperClass="shrink-0">
 											<Tab
diff --git a/frontend/src/lib/components/graph/renderers/nodes/AIToolNode.svelte b/frontend/src/lib/components/graph/renderers/nodes/AIToolNode.svelte
index 5fd064c033b82..e01f522744563 100644
--- a/frontend/src/lib/components/graph/renderers/nodes/AIToolNode.svelte
+++ b/frontend/src/lib/components/graph/renderers/nodes/AIToolNode.svelte
@@ -200,7 +200,10 @@
 			allToolEdges.push(...(toolEdges ?? []))
 			allToolNodes.push(...(toolNodes ?? []))
 
-			if (insertable) {
+			// A linked agent is rigid: its tools come from the resource and can't be edited here, so
+			// don't offer the "add tool" node (unlink/fork the step to change tools).
+			const isLinkedAgent = !!(node.data.module.value as { agent?: string }).agent
+			if (insertable && !isLinkedAgent) {
 				allToolNodes.push({
 					type: 'newAiTool',
 					data: { eventHandlers, agentModuleId: node.data.module.id },
diff --git a/openflow.openapi.yaml b/openflow.openapi.yaml
index 9ccfa3ceab7d1..b9bfedb6bc78e 100644
--- a/openflow.openapi.yaml
+++ b/openflow.openapi.yaml
@@ -1085,6 +1085,13 @@ components:
           type: boolean
           default: false
           description: If true, this AI agent step does not persist its assistant or tool messages to the flow conversation when chat mode is enabled.
+        agent:
+          type: string
+          description: |
+            Path of a reusable `ai_agent` resource (hybrid linking). When set, the agent brain
+            config (provider/model/system prompt/etc.) and tool set are resolved at runtime from
+            that resource; the module's input_transforms then only carry the flow-local inputs
+            (user_message/user_attachments).
         parallel:
           type: boolean
           description: If true, the agent can execute multiple tool calls in parallel