Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DELETE FROM resource_type WHERE workspace_id = 'admins' AND name = 'ai_agent';
41 changes: 41 additions & 0 deletions backend/migrations/20260627124035_ai_agent_resource_type.up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
-- Built-in `ai_agent` resource type backing reusable AI agent steps.
-- A resource of this type stores an agent's brain (provider/model/system prompt/etc.),
-- its tool set, and its eval suite. Flow steps link to it via FlowModuleValue::AIAgent.agent.
--
-- Seeded into the `admins` workspace: list_resource_types unions `workspace_id = 'admins'`,
-- so this single row is visible from every workspace (existing and future), mirroring how
-- hub-synced built-in types (e.g. s3object) are made globally available.
INSERT INTO resource_type (workspace_id, name, schema, description, edited_at) VALUES
('admins', 'ai_agent', '{
"type": "object",
"$schema": "https://json-schema.org/draft/2020-12/schema",
"required": ["provider"],
"properties": {
"provider": {
"type": "object",
"format": "ai-provider",
"description": "AI provider + model + credentials resource for the agent."
},
"system_prompt": { "type": "string", "description": "System prompt for the agent." },
"temperature": { "type": "number", "description": "Sampling temperature (0.0-2.0)." },
"max_completion_tokens": { "type": "number", "description": "Maximum output tokens." },
"max_iterations": { "type": "number", "description": "Max reasoning/tool-use loops." },
"output_type": { "type": "string", "enum": ["text", "image"], "default": "text" },
"output_schema": { "type": "object", "format": "json-schema", "description": "Structured-output JSON schema." },
"streaming": { "type": "boolean" },
"memory": { "type": "object", "description": "Conversation memory config (off/auto/manual)." },
"tools": { "type": "array", "description": "Reusable tool definitions available to the agent." },
"evals": {
"type": "object",
"description": "Eval suite: cases graded by deterministic assertions and/or an LLM judge.",
"properties": {
"cases": { "type": "array" },
"judge": { "type": "object" }
}
}
}
}'::jsonb,
'A reusable AI agent: provider/model, system prompt, tools and an eval suite. Referenced by AI agent flow steps.',
now())
ON CONFLICT (workspace_id, name) DO UPDATE
SET schema = EXCLUDED.schema, description = EXCLUDED.description, edited_at = now();
89 changes: 89 additions & 0 deletions backend/windmill-ai/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,95 @@ impl From<AIAgentArgsRaw> for AIAgentArgs {
}
}

// ===========================================================================
// Reusable AI agent — eval suite types
//
// An `ai_agent` resource stores an `AIAgentConfig` (brain + tools + evals). The
// brain fields mirror `AIAgentArgsRaw` and are merged into `AIAgentArgs` at
// runtime via a plain JSON merge, so they are not re-declared here. The types
// below model the eval suite, which the judge/run endpoints inspect directly.
// ===========================================================================

/// One eval case stored under an `ai_agent` resource's `evals.cases`.
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct AgentEvalCase {
pub id: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub name: Option<String>,
pub input: EvalInput,
/// LLM-judge acceptance criteria (each a single bullet the output must satisfy).
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub judge_checklist: Vec<String>,
/// Deterministic checks evaluated without an LLM.
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub assertions: Vec<Assertion>,
}

#[derive(Serialize, Deserialize, Debug, Clone, Default)]
pub struct EvalInput {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub user_message: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub user_attachments: Option<Vec<S3Object>>,
}

/// Deterministic, LLM-free check run against an agent's output.
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum Assertion {
Contains {
value: String,
#[serde(default)]
case_sensitive: bool,
},
NotContains {
value: String,
#[serde(default)]
case_sensitive: bool,
},
Regex {
pattern: String,
},
/// JSONPath-style dotted path into a structured output equals the given value.
JsonPathEquals {
path: String,
value: serde_json::Value,
},
/// Output validates against the agent's configured `output_schema`.
OutputSchemaValid,
}

#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct AssertionResult {
pub assertion: Assertion,
pub passed: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub detail: Option<String>,
}

/// LLM-judge verdict for one case (0-100 score + pass/fail + rationale).
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct JudgeResult {
pub score: u8,
pub pass: bool,
pub summary: String,
}

#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct EvalCaseResult {
pub case_id: String,
pub passed: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub output: Option<Box<RawValue>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub assertions: Vec<AssertionResult>,
#[serde(skip_serializing_if = "Option::is_none")]
pub judge: Option<JudgeResult>,
pub latency_ms: u64,
}

#[derive(Deserialize, Debug)]
pub struct ProviderResource {
#[serde(alias = "apiKey", default, deserialize_with = "empty_string_as_none")]
Expand Down
73 changes: 73 additions & 0 deletions backend/windmill-api/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6768,6 +6768,79 @@ paths:
schema:
type: string

/w/{workspace}/ai_agents/run:
post:
summary: run a saved AI agent once on an input
operationId: runAiAgent
tags:
- ai_agent
parameters:
- $ref: "#/components/parameters/WorkspaceId"
requestBody:
description: agent resource path and input
required: true
content:
application/json:
schema:
type: object
required:
- agent
properties:
agent:
type: string
description: Path of a saved ai_agent resource
input:
type: object
properties:
user_message:
type: string
user_attachments:
type: array
items:
type: object
responses:
"200":
description: agent output
content:
application/json:
schema: {}

/w/{workspace}/ai_agents/eval_case:
post:
summary: run a single eval case against a saved AI agent
operationId: evalAiAgentCase
tags:
- ai_agent
parameters:
- $ref: "#/components/parameters/WorkspaceId"
requestBody:
description: agent resource path, eval case, and optional judge provider
required: true
content:
application/json:
schema:
type: object
required:
- agent
- case
properties:
agent:
type: string
description: Path of a saved ai_agent resource
case:
type: object
description: AgentEvalCase (id, input, judge_checklist, assertions)
judge_provider:
type: object
description: Optional ai-provider override for the judge; defaults to the agent's provider
responses:
"200":
description: eval case result (pass/fail, judge verdict, assertion results, output)
content:
application/json:
schema:
type: object

/w/{workspace}/resources/delete/{path}:
delete:
summary: delete resource
Expand Down
Loading
Loading