windmill-labs · rubenfiszel · Jun 27, 2026 · Jun 28, 2026 · Jun 28, 2026 · Jun 28, 2026
@@ -0,0 +1 @@
+DELETE FROM resource_type WHERE workspace_id = 'admins' AND name = 'ai_agent';
@@ -0,0 +1,41 @@
+-- Built-in `ai_agent` resource type backing reusable AI agent steps.
+-- A resource of this type stores an agent's brain (provider/model/system prompt/etc.),
+-- its tool set, and its eval suite. Flow steps link to it via FlowModuleValue::AIAgent.agent.
+--
+-- Seeded into the `admins` workspace: list_resource_types unions `workspace_id = 'admins'`,
+-- so this single row is visible from every workspace (existing and future), mirroring how
+-- hub-synced built-in types (e.g. s3object) are made globally available.
+INSERT INTO resource_type (workspace_id, name, schema, description, edited_at) VALUES
+    ('admins', 'ai_agent', '{
+        "type": "object",
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "required": ["provider"],
+        "properties": {
+            "provider": {
+                "type": "object",
+                "format": "ai-provider",
+                "description": "AI provider + model + credentials resource for the agent."
+            },
+            "system_prompt": { "type": "string", "description": "System prompt for the agent." },
+            "temperature": { "type": "number", "description": "Sampling temperature (0.0-2.0)." },
+            "max_completion_tokens": { "type": "number", "description": "Maximum output tokens." },
+            "max_iterations": { "type": "number", "description": "Max reasoning/tool-use loops." },
+            "output_type": { "type": "string", "enum": ["text", "image"], "default": "text" },
+            "output_schema": { "type": "object", "format": "json-schema", "description": "Structured-output JSON schema." },
+            "streaming": { "type": "boolean" },
+            "memory": { "type": "object", "description": "Conversation memory config (off/auto/manual)." },
+            "tools": { "type": "array", "description": "Reusable tool definitions available to the agent." },
+            "evals": {
+                "type": "object",
+                "description": "Eval suite: cases graded by deterministic assertions and/or an LLM judge.",
+                "properties": {
+                    "cases": { "type": "array" },
+                    "judge": { "type": "object" }
+                }
+            }
+        }
+    }'::jsonb,
+    'A reusable AI agent: provider/model, system prompt, tools and an eval suite. Referenced by AI agent flow steps.',
+    now())
+ON CONFLICT (workspace_id, name) DO UPDATE
+    SET schema = EXCLUDED.schema, description = EXCLUDED.description, edited_at = now();
@@ -160,6 +160,95 @@ impl From<AIAgentArgsRaw> for AIAgentArgs {
     }
 }
 
+// ===========================================================================
+// Reusable AI agent — eval suite types
+//
+// An `ai_agent` resource stores an `AIAgentConfig` (brain + tools + evals). The
+// brain fields mirror `AIAgentArgsRaw` and are merged into `AIAgentArgs` at
+// runtime via a plain JSON merge, so they are not re-declared here. The types
+// below model the eval suite, which the judge/run endpoints inspect directly.
+// ===========================================================================
+
+/// One eval case stored under an `ai_agent` resource's `evals.cases`.
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct AgentEvalCase {
+    pub id: String,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub name: Option<String>,
+    pub input: EvalInput,
+    /// LLM-judge acceptance criteria (each a single bullet the output must satisfy).
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub judge_checklist: Vec<String>,
+    /// Deterministic checks evaluated without an LLM.
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub assertions: Vec<Assertion>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, Default)]
+pub struct EvalInput {
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub user_message: Option<String>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub user_attachments: Option<Vec<S3Object>>,
+}
+
+/// Deterministic, LLM-free check run against an agent's output.
+#[derive(Serialize, Deserialize, Debug, Clone)]
+#[serde(tag = "kind", rename_all = "snake_case")]
+pub enum Assertion {
+    Contains {
+        value: String,
+        #[serde(default)]
+        case_sensitive: bool,
+    },
+    NotContains {
+        value: String,
+        #[serde(default)]
+        case_sensitive: bool,
+    },
+    Regex {
+        pattern: String,
+    },
+    /// JSONPath-style dotted path into a structured output equals the given value.
+    JsonPathEquals {
+        path: String,
+        value: serde_json::Value,
+    },
+    /// Output validates against the agent's configured `output_schema`.
+    OutputSchemaValid,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct AssertionResult {
+    pub assertion: Assertion,
+    pub passed: bool,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub detail: Option<String>,
+}
+
+/// LLM-judge verdict for one case (0-100 score + pass/fail + rationale).
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct JudgeResult {
+    pub score: u8,
+    pub pass: bool,
+    pub summary: String,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+pub struct EvalCaseResult {
+    pub case_id: String,
+    pub passed: bool,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub output: Option<Box<RawValue>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub error: Option<String>,
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub assertions: Vec<AssertionResult>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub judge: Option<JudgeResult>,
+    pub latency_ms: u64,
+}
+
 #[derive(Deserialize, Debug)]
 pub struct ProviderResource {
     #[serde(alias = "apiKey", default, deserialize_with = "empty_string_as_none")]

@@ -6768,6 +6768,79 @@ paths:
               schema:
                 type: string
 
+  /w/{workspace}/ai_agents/run:
+    post:
+      summary: run a saved AI agent once on an input
+      operationId: runAiAgent
+      tags:
+        - ai_agent
+      parameters:
+        - $ref: "#/components/parameters/WorkspaceId"
+      requestBody:
+        description: agent resource path and input
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - agent
+              properties:
+                agent:
+                  type: string
+                  description: Path of a saved ai_agent resource
+                input:
+                  type: object
+                  properties:
+                    user_message:
+                      type: string
+                    user_attachments:
+                      type: array
+                      items:
+                        type: object
+      responses:
+        "200":
+          description: agent output
+          content:
+            application/json:
+              schema: {}
+
+  /w/{workspace}/ai_agents/eval_case:
+    post:
+      summary: run a single eval case against a saved AI agent
+      operationId: evalAiAgentCase
+      tags:
+        - ai_agent
+      parameters:
+        - $ref: "#/components/parameters/WorkspaceId"
+      requestBody:
+        description: agent resource path, eval case, and optional judge provider
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - agent
+                - case
+              properties:
+                agent:
+                  type: string
+                  description: Path of a saved ai_agent resource
+                case:
+                  type: object
+                  description: AgentEvalCase (id, input, judge_checklist, assertions)
+                judge_provider:
+                  type: object
+                  description: Optional ai-provider override for the judge; defaults to the agent's provider
+      responses:
+        "200":
+          description: eval case result (pass/fail, judge verdict, assertion results, output)
+          content:
+            application/json:
+              schema:
+                type: object
+
   /w/{workspace}/resources/delete/{path}:
     delete:
       summary: delete resource
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		DELETE FROM resource_type WHERE workspace_id = 'admins' AND name = 'ai_agent';