diff --git a/apps/app/scripts/_util.mjs b/apps/app/scripts/_util.mjs
index 76369d1a9..6809cec0c 100644
--- a/apps/app/scripts/_util.mjs
+++ b/apps/app/scripts/_util.mjs
@@ -47,6 +47,7 @@ export async function spawnOpencodeServe({
hostname = "127.0.0.1",
port,
corsOrigins = [],
+ env = {},
}) {
assert.ok(directory && directory.trim(), "directory is required");
assert.ok(Number.isInteger(port) && port > 0, "port must be a positive integer");
@@ -62,6 +63,7 @@ export async function spawnOpencodeServe({
stdio: ["ignore", "pipe", "pipe"],
env: {
...process.env,
+ ...env,
// Make it explicit we're a non-TUI client.
OPENCODE_CLIENT: "openwork-test",
},
diff --git a/apps/app/scripts/managed-voice-e2e.mjs b/apps/app/scripts/managed-voice-e2e.mjs
new file mode 100644
index 000000000..c159d2ef0
--- /dev/null
+++ b/apps/app/scripts/managed-voice-e2e.mjs
@@ -0,0 +1,278 @@
+import assert from "node:assert/strict";
+import { spawn } from "node:child_process";
+import { createServer } from "node:http";
+import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join, resolve } from "node:path";
+
+import {
+ findFreePort,
+ parseArgs,
+} from "./_util.mjs";
+
+const args = parseArgs(process.argv.slice(2));
+const directory = args.get("dir") ?? process.cwd();
+const outDir = resolve(args.get("out") ?? join(process.cwd(), "evals", "results", `managed-voice-${Date.now()}`));
+
+const proofFrames = [];
+const results = {
+ ok: true,
+ outDir,
+ steps: [],
+};
+
+function escapeHtml(value) {
+ return String(value)
+ .replaceAll("&", "&")
+ .replaceAll("<", "<")
+ .replaceAll(">", ">")
+ .replaceAll('"', """);
+}
+
+async function frame(name, data) {
+ const file = `${String(proofFrames.length + 1).padStart(2, "0")}-${name.replace(/[^a-z0-9]+/gi, "-").replace(/^-|-$/g, "").toLowerCase()}.html`;
+ const safeData = redactProofData(data);
+ proofFrames.push({ file, name, data: safeData });
+ await writeFile(join(outDir, file), `
+
+
+
+
+ ${escapeHtml(name)}
+
+
+${escapeHtml(name)}
${escapeHtml(JSON.stringify(safeData, null, 2))}
+`, "utf8");
+}
+
+function redactProofData(value) {
+ if (Array.isArray(value)) return value.map(redactProofData);
+ if (!value || typeof value !== "object") return value;
+ return Object.fromEntries(Object.entries(value).map(([key, entry]) => {
+ if (/token|secret|authorization|api.?key/i.test(key)) return [key, "[redacted]"];
+ if (typeof entry === "string" && /^(owt_|ow_inf_|sk-)/.test(entry)) return [key, "[redacted]"];
+ return [key, redactProofData(entry)];
+ }));
+}
+
+async function renderIndex() {
+ const frames = proofFrames.map((entry) => `
+ `).join("\n");
+ await writeFile(join(outDir, "index.html"), `
+
+
+
+
+ Managed Voice E2E Proof
+
+
+
+ Managed Voice E2E Proof
+ Result: ${results.ok ? "passed" : "failed"} · Output: ${escapeHtml(outDir)}
+${frames}
+
+`, "utf8");
+}
+
+function step(name, fn) {
+ results.steps.push({ name, status: "running" });
+ const idx = results.steps.length - 1;
+ return Promise.resolve()
+ .then(fn)
+ .then(async (data) => {
+ results.steps[idx] = { name, status: "ok", data };
+ await frame(name, data);
+ return data;
+ })
+ .catch(async (error) => {
+ results.ok = false;
+ const message = error instanceof Error ? error.message : String(error);
+ results.steps[idx] = { name, status: "error", error: message };
+ await frame(`${name} failure`, { error: message });
+ throw error;
+ });
+}
+
+async function startMockBroker() {
+ const requests = [];
+ const server = createServer((req, res) => {
+ let body = "";
+ req.on("data", (chunk) => {
+ body += String(chunk);
+ });
+ req.on("end", () => {
+ requests.push({ method: req.method, url: req.url, authorization: req.headers.authorization ?? null, body });
+ if (req.method !== "POST" || req.url !== "/voice/realtime/session") {
+ res.writeHead(404, { "content-type": "application/json" });
+ res.end(JSON.stringify({ error: "not_found" }));
+ return;
+ }
+ if (req.headers.authorization !== "Bearer ow_inf_e2e") {
+ res.writeHead(401, { "content-type": "application/json" });
+ res.end(JSON.stringify({ error: { code: "invalid_api_key" } }));
+ return;
+ }
+ res.writeHead(200, { "content-type": "application/json" });
+ res.end(JSON.stringify({
+ ok: true,
+ clientSecret: "managed-e2e-client-secret",
+ expiresAt: 987654321,
+ model: "gpt-realtime-2",
+ transcriptionModel: "gpt-4o-transcribe",
+ tools: ["openwork_snapshot", "openwork_list_actions", "openwork_execute_action"],
+ source: "openwork-models",
+ }));
+ });
+ });
+ const port = await findFreePort();
+ await new Promise((resolveReady) => server.listen(port, "127.0.0.1", resolveReady));
+ return {
+ baseUrl: `http://127.0.0.1:${port}`,
+ requests,
+ close: () => new Promise((resolveClose) => server.close(resolveClose)),
+ };
+}
+
+async function startOpenWorkServer({ directory, port, env }) {
+ const token = "owt_managed_voice_client";
+ const hostToken = "owt_managed_voice_host";
+ const child = spawn("bun", [
+ "apps/server/src/cli.ts",
+ "--host", "127.0.0.1",
+ "--port", String(port),
+ "--token", token,
+ "--host-token", hostToken,
+ "--workspace", directory,
+ "--approval", "auto",
+ "--no-log-requests",
+ ], {
+ cwd: resolve(join(import.meta.dirname, "..", "..", "..")),
+ stdio: ["ignore", "pipe", "pipe"],
+ env: { ...process.env, ...env, OPENWORK_DEV_MODE: "1" },
+ });
+ let stdout = "";
+ let stderr = "";
+ child.stdout.setEncoding("utf8");
+ child.stderr.setEncoding("utf8");
+ child.stdout.on("data", (chunk) => { stdout += chunk; });
+ child.stderr.on("data", (chunk) => { stderr += chunk; });
+ const baseUrl = `http://127.0.0.1:${port}`;
+
+ return {
+ baseUrl,
+ token,
+ hostToken,
+ getStdout: () => stdout,
+ getStderr: () => stderr,
+ async close() {
+ if (child.exitCode !== null || child.signalCode !== null) return;
+ child.kill("SIGTERM");
+ await Promise.race([
+ new Promise((resolveExit) => child.once("exit", resolveExit)),
+ new Promise((resolveTimeout) => setTimeout(resolveTimeout, 2500)),
+ ]);
+ if (child.exitCode === null && child.signalCode === null) child.kill("SIGKILL");
+ },
+ };
+}
+
+async function waitForServerHealthy(baseUrl) {
+ const startedAt = Date.now();
+ let lastError = "";
+ while (Date.now() - startedAt < 30_000) {
+ try {
+ const response = await fetch(`${baseUrl}/health`, { signal: AbortSignal.timeout(2500) });
+ if (response.ok) return response.json();
+ lastError = `${response.status} ${response.statusText}`;
+ } catch (error) {
+ lastError = error instanceof Error ? error.message : String(error);
+ }
+ await new Promise((resolvePoll) => setTimeout(resolvePoll, 250));
+ }
+ throw new Error(`Timed out waiting for OpenWork server health: ${lastError}`);
+}
+
+await rm(outDir, { recursive: true, force: true });
+await mkdir(outDir, { recursive: true });
+const envDir = await mkdtemp(join(tmpdir(), "openwork-managed-voice-e2e-"));
+const mockBroker = await startMockBroker();
+const port = await findFreePort();
+const server = await startOpenWorkServer({
+ directory,
+ port,
+ env: {
+ OPENWORK_ENV_STORE: join(envDir, "env.json"),
+ OPENWORK_TOKEN_STORE: join(envDir, "tokens.json"),
+ OPENWORK_API_KEY: "ow_inf_e2e",
+ OPENWORK_INFERENCE_BASE_URL: mockBroker.baseUrl,
+ },
+});
+
+try {
+ await step("server health", async () => waitForServerHealthy(server.baseUrl));
+
+ const owner = await step("owner token", async () => {
+ const response = await fetch(`${server.baseUrl}/tokens`, {
+ method: "POST",
+ headers: { "x-openwork-host-token": server.hostToken, "content-type": "application/json" },
+ body: JSON.stringify({ scope: "owner", label: "managed voice e2e" }),
+ });
+ assert.equal(response.status, 201);
+ const body = await response.json();
+ assert.equal(typeof body.token, "string");
+ return body;
+ });
+
+ const session = await step("managed voice session", async () => {
+ const response = await fetch(`${server.baseUrl}/voice/realtime/session`, {
+ method: "POST",
+ headers: { authorization: `Bearer ${owner.token}`, "content-type": "application/json" },
+ body: JSON.stringify({}),
+ });
+ assert.equal(response.status, 200);
+ const body = await response.json();
+ assert.equal(body.ok, true);
+ assert.equal(body.clientSecret, "managed-e2e-client-secret");
+ assert.equal(body.source, "openwork-models");
+ return body;
+ });
+
+ await step("broker received authenticated request", async () => {
+ assert.equal(mockBroker.requests.length, 1);
+ assert.equal(mockBroker.requests[0].authorization, "Bearer ow_inf_e2e");
+ assert.equal(session.model, "gpt-realtime-2");
+ return mockBroker.requests[0];
+ });
+
+ await renderIndex();
+ console.log(JSON.stringify({ ...results, proof: join(outDir, "index.html") }, null, 2));
+} catch (error) {
+ const message = error instanceof Error ? error.message : String(error);
+ results.ok = false;
+ results.error = message;
+ results.stderr = server.getStderr();
+ results.stdout = server.getStdout?.() ?? "";
+ await renderIndex();
+ console.error(JSON.stringify({ ...results, proof: join(outDir, "index.html") }, null, 2));
+ process.exitCode = 1;
+} finally {
+ await server.close();
+ await mockBroker.close();
+}
diff --git a/apps/app/src/app/lib/openwork-server.ts b/apps/app/src/app/lib/openwork-server.ts
index 5ab36f7d4..edf04b635 100644
--- a/apps/app/src/app/lib/openwork-server.ts
+++ b/apps/app/src/app/lib/openwork-server.ts
@@ -1805,6 +1805,7 @@ export function createOpenworkServerClient(options: { baseUrl: string; token?: s
model: string;
transcriptionModel: string;
tools: string[];
+ source?: string;
}>(baseUrl, "/voice/realtime/session", {
token,
hostToken,
diff --git a/apps/app/src/react-app/domains/connections/provider-auth/store.ts b/apps/app/src/react-app/domains/connections/provider-auth/store.ts
index e2c8aee85..93177fc50 100644
--- a/apps/app/src/react-app/domains/connections/provider-auth/store.ts
+++ b/apps/app/src/react-app/domains/connections/provider-auth/store.ts
@@ -353,6 +353,27 @@ export function createProviderAuthStore(options: CreateProviderAuthStoreOptions)
return next;
};
+ const readCloudProviderBaseUrl = (provider: DenOrgLlmProviderConnection) => {
+ const options = provider.providerConfig.options;
+ if (options && typeof options === "object" && !Array.isArray(options)) {
+ const baseURL = "baseURL" in options ? options.baseURL : undefined;
+ if (typeof baseURL === "string" && baseURL.trim()) return baseURL.trim().replace(/\/api\/v1\/?$/, "");
+ }
+ const api = provider.providerConfig.api;
+ if (typeof api === "string" && api.trim()) return api.trim().replace(/\/api\/v1\/?$/, "");
+ return "";
+ };
+
+ const mirrorOpenWorkModelsVoiceEnv = async (provider: DenOrgLlmProviderConnection, apiKey: string) => {
+ if (provider.source !== "openwork" || !apiKey.trim()) return;
+ const openworkClient = options.openworkServer.getSnapshot().openworkServerClient;
+ if (!openworkClient) return;
+ const baseUrl = readCloudProviderBaseUrl(provider);
+ const entries = [{ key: "OPENWORK_API_KEY", value: apiKey.trim() }];
+ if (baseUrl) entries.push({ key: "OPENWORK_INFERENCE_BASE_URL", value: baseUrl });
+ await openworkClient.upsertUserEnv(entries);
+ };
+
const readWorkspaceOpenworkConfigRecord = async (): Promise<
Record
> => {
@@ -1395,6 +1416,7 @@ export function createProviderAuthStore(options: CreateProviderAuthStoreOptions)
providerID: localProviderId,
auth: { type: "api", key: apiKey },
});
+ await mirrorOpenWorkModelsVoiceEnv(provider, apiKey);
}
if (existingImported?.providerId && existingImported.providerId !== localProviderId) {
try {
diff --git a/apps/server/src/env-routes.e2e.test.ts b/apps/server/src/env-routes.e2e.test.ts
index 86b838b62..f8d09b88e 100644
--- a/apps/server/src/env-routes.e2e.test.ts
+++ b/apps/server/src/env-routes.e2e.test.ts
@@ -17,6 +17,8 @@ const dirs: string[] = [];
const priorEnvStore = process.env.OPENWORK_ENV_STORE;
const priorTokenStore = process.env.OPENWORK_TOKEN_STORE;
const priorOpenAiApiKey = process.env.OPENAI_API_KEY;
+const priorOpenWorkApiKey = process.env.OPENWORK_API_KEY;
+const priorOpenWorkInferenceBaseUrl = process.env.OPENWORK_INFERENCE_BASE_URL;
const nativeFetch = globalThis.fetch;
function baseConfig(): ServerConfig {
@@ -82,6 +84,16 @@ afterEach(async () => {
} else {
process.env.OPENAI_API_KEY = priorOpenAiApiKey;
}
+ if (priorOpenWorkApiKey === undefined) {
+ delete process.env.OPENWORK_API_KEY;
+ } else {
+ process.env.OPENWORK_API_KEY = priorOpenWorkApiKey;
+ }
+ if (priorOpenWorkInferenceBaseUrl === undefined) {
+ delete process.env.OPENWORK_INFERENCE_BASE_URL;
+ } else {
+ process.env.OPENWORK_INFERENCE_BASE_URL = priorOpenWorkInferenceBaseUrl;
+ }
globalThis.fetch = nativeFetch;
});
@@ -355,6 +367,57 @@ describe("env routes", () => {
});
});
+ test("voice realtime session prefers OpenWork Models broker when configured", async () => {
+ process.env.OPENWORK_API_KEY = "ow_inf_test";
+ process.env.OPENWORK_INFERENCE_BASE_URL = "https://inference.example.test";
+ process.env.OPENAI_API_KEY = "sk-should-not-be-used";
+ const { base } = await boot();
+
+ globalThis.fetch = ((input, init) => {
+ const url = String(input);
+ if (url === "https://inference.example.test/voice/realtime/session") {
+ expect(init?.headers).toMatchObject({ Authorization: "Bearer ow_inf_test" });
+ return Promise.resolve(new Response(JSON.stringify({
+ ok: true,
+ clientSecret: "managed-rt-secret",
+ expiresAt: 456,
+ model: "gpt-realtime-2",
+ transcriptionModel: "gpt-4o-transcribe",
+ tools: ["openwork_snapshot"],
+ source: "openwork-models",
+ }), {
+ status: 200,
+ headers: { "content-type": "application/json" },
+ }));
+ }
+ if (url === "https://api.openai.com/v1/realtime/client_secrets") {
+ return Promise.resolve(new Response("direct OpenAI should not be called", { status: 500 }));
+ }
+ return nativeFetch(input, init);
+ }) as typeof fetch;
+
+ const issued = await fetch(`${base}/tokens`, {
+ method: "POST",
+ headers: hostAuth(),
+ body: JSON.stringify({ scope: "owner", label: "managed voice owner" }),
+ });
+ const tokenBody = (await issued.json()) as { token: string };
+
+ const response = await fetch(`${base}/voice/realtime/session`, {
+ method: "POST",
+ headers: { authorization: `Bearer ${tokenBody.token}`, "content-type": "application/json" },
+ body: JSON.stringify({}),
+ });
+
+ expect(response.status).toBe(200);
+ expect(await response.json()).toMatchObject({
+ ok: true,
+ clientSecret: "managed-rt-secret",
+ expiresAt: 456,
+ source: "openwork-models",
+ });
+ });
+
test("values persist across server restart", async () => {
const first = await boot();
await fetch(`${first.base}/env`, {
diff --git a/apps/server/src/server.ts b/apps/server/src/server.ts
index 8c5c77b67..7e4c66859 100644
--- a/apps/server/src/server.ts
+++ b/apps/server/src/server.ts
@@ -277,6 +277,26 @@ async function resolveOpenAiRealtimeApiKey(env: EnvService): Promise {
"";
}
+async function resolveOpenWorkModelsVoiceConfig(env: EnvService): Promise<{ baseUrl: string; apiKey: string } | null> {
+ const records = await env.list();
+ const apiKey =
+ records.find((entry) => entry.key === "OPENWORK_API_KEY")?.value.trim() ||
+ records.find((entry) => entry.key === "OPENWORK_MODELS_API_KEY")?.value.trim() ||
+ process.env.OPENWORK_API_KEY?.trim() ||
+ process.env.OPENWORK_MODELS_API_KEY?.trim() ||
+ "";
+ if (!apiKey) return null;
+
+ const baseUrl =
+ records.find((entry) => entry.key === "OPENWORK_INFERENCE_BASE_URL")?.value.trim() ||
+ records.find((entry) => entry.key === "OPENWORK_MODELS_BASE_URL")?.value.trim() ||
+ process.env.OPENWORK_INFERENCE_BASE_URL?.trim() ||
+ process.env.OPENWORK_MODELS_BASE_URL?.trim() ||
+ "";
+ if (!baseUrl) return null;
+ return { apiKey, baseUrl: baseUrl.replace(/\/+$/, "") };
+}
+
function openworkVoiceRealtimeInstructions() {
return `# Role and Objective
@@ -323,6 +343,34 @@ function readOpenAiClientSecret(payload: unknown): { clientSecret: string; expir
}
async function createOpenAiRealtimeVoiceSession(env: EnvService, input: unknown) {
+ const managedVoice = await resolveOpenWorkModelsVoiceConfig(env);
+ if (managedVoice) {
+ const response = await fetch(`${managedVoice.baseUrl}/voice/realtime/session`, {
+ method: "POST",
+ headers: {
+ Authorization: `Bearer ${managedVoice.apiKey}`,
+ "Content-Type": "application/json",
+ },
+ body: JSON.stringify(input ?? {}),
+ });
+ const text = await response.text();
+ let payload: unknown = null;
+ try {
+ payload = text ? JSON.parse(text) : null;
+ } catch {
+ payload = null;
+ }
+ if (!response.ok) {
+ const errorPayload = isRecord(payload) && isRecord(payload.error) ? payload.error : null;
+ const message = typeof errorPayload?.message === "string" ? errorPayload.message : response.statusText;
+ throw new ApiError(response.status, "openwork_models_voice_failed", message || "OpenWork Models could not create a voice session");
+ }
+ if (!isRecord(payload) || payload.ok !== true || typeof payload.clientSecret !== "string") {
+ throw new ApiError(502, "openwork_models_voice_invalid_response", "OpenWork Models did not return a usable Realtime client secret");
+ }
+ return payload;
+ }
+
const apiKey = await resolveOpenAiRealtimeApiKey(env);
if (!apiKey) {
throw new ApiError(
diff --git a/ee/apps/inference/src/app.ts b/ee/apps/inference/src/app.ts
index 62e4908ba..6caa2ffef 100644
--- a/ee/apps/inference/src/app.ts
+++ b/ee/apps/inference/src/app.ts
@@ -8,6 +8,7 @@ import { logger } from "hono/logger";
import { z } from "zod";
import { env } from "./env.js";
import { registerProxyRoutes } from "./proxy.js";
+import { registerVoiceRoutes } from "./voice.js";
import { registerWebhookRoutes } from "./webhooks.js";
const srcDir = path.dirname(fileURLToPath(import.meta.url));
@@ -65,6 +66,7 @@ if (shouldServeLocalModelCatalog) {
}
registerProxyRoutes(app);
+registerVoiceRoutes(app);
registerWebhookRoutes(app);
app.onError((error, c) => {
diff --git a/ee/apps/inference/src/env.ts b/ee/apps/inference/src/env.ts
index e7aca8ebe..66aa5ac3e 100644
--- a/ee/apps/inference/src/env.ts
+++ b/ee/apps/inference/src/env.ts
@@ -14,6 +14,8 @@ const EnvSchema = z
DEN_DB_ENCRYPTION_KEY: z.string().trim().min(32),
INFERENCE_PROXY_BASE_URL: z.string().optional(),
OPENROUTER_UPSTREAM_URL: z.string().optional(),
+ OPENAI_REALTIME_API_KEY: z.string().optional(),
+ OPENAI_API_KEY: z.string().optional(),
INFERENCE_ADMIN_TOKEN: z.string().optional(),
INFERENCE_WEBHOOK_SECRET: z.string().optional(),
INFERENCE_CREDITS_PER_DOLLAR: z.string().optional(),
@@ -120,6 +122,7 @@ export const env = {
openRouterUpstreamUrl: normalizeUrl(
parsed.OPENROUTER_UPSTREAM_URL ?? "https://openrouter.ai/api/v1",
),
+ openAiRealtimeApiKey: optionalString(parsed.OPENAI_REALTIME_API_KEY) ?? optionalString(parsed.OPENAI_API_KEY),
adminToken: optionalString(parsed.INFERENCE_ADMIN_TOKEN),
webhookSecret: optionalString(parsed.INFERENCE_WEBHOOK_SECRET),
creditsPerDollar: parseCreditsPerDollar(parsed.INFERENCE_CREDITS_PER_DOLLAR),
diff --git a/ee/apps/inference/src/voice.ts b/ee/apps/inference/src/voice.ts
new file mode 100644
index 000000000..d6adf715b
--- /dev/null
+++ b/ee/apps/inference/src/voice.ts
@@ -0,0 +1,192 @@
+import { createHash } from "node:crypto"
+import type { Hono } from "hono"
+import { env } from "./env.js"
+import { findActiveInferenceKey } from "./keys.js"
+import { ensureUsableBuckets } from "./limits.js"
+
+const OPENWORK_VOICE_REALTIME_MODEL = "gpt-realtime-2"
+const OPENWORK_VOICE_TRANSCRIPTION_MODEL = "gpt-4o-transcribe"
+
+const OPENWORK_VOICE_REALTIME_TOOLS = [
+ {
+ type: "function",
+ name: "openwork_snapshot",
+ description: "Read the current OpenWork UI control snapshot: route, status, narration, and visible action metadata.",
+ parameters: { type: "object", properties: {}, additionalProperties: false },
+ },
+ {
+ type: "function",
+ name: "openwork_list_actions",
+ description: "List semantic OpenWork UI actions. Call this before openwork_execute_action when you do not know the exact action id.",
+ parameters: { type: "object", properties: {}, additionalProperties: false },
+ },
+ {
+ type: "function",
+ name: "openwork_execute_action",
+ description: "Execute a semantic OpenWork UI action by id. Prefer this over screen coordinates or DOM guessing.",
+ parameters: {
+ type: "object",
+ properties: {
+ actionId: { type: "string", description: "The action id from openwork_list_actions, such as composer.set_text or composer.send." },
+ args: { type: "object", description: "Optional JSON arguments for the action.", additionalProperties: true },
+ },
+ required: ["actionId"],
+ additionalProperties: false,
+ },
+ },
+]
+
+function readApiKey(request: Request) {
+ const auth = request.headers.get("authorization")
+ if (auth?.toLowerCase().startsWith("bearer ")) {
+ return auth.slice(7).trim()
+ }
+ return request.headers.get("x-api-key")?.trim() ?? null
+}
+
+function buildRequestId() {
+ return createHash("sha256").update(`${Date.now()}:${Math.random()}`).digest("hex").slice(0, 32)
+}
+
+function isRecord(value: unknown): value is Record {
+ return typeof value === "object" && value !== null && !Array.isArray(value)
+}
+
+function readStringField(value: unknown, key: string) {
+ if (!isRecord(value)) return ""
+ const field = value[key]
+ return typeof field === "string" ? field.trim() : ""
+}
+
+function readOpenAiClientSecret(payload: unknown): { clientSecret: string; expiresAt: number | null } {
+ if (!isRecord(payload)) return { clientSecret: "", expiresAt: null }
+ const clientSecret = payload.client_secret
+ if (typeof clientSecret === "string") return { clientSecret, expiresAt: null }
+ if (isRecord(clientSecret)) {
+ const value = typeof clientSecret.value === "string" ? clientSecret.value : ""
+ const expiresAt = typeof clientSecret.expires_at === "number" ? clientSecret.expires_at : null
+ return { clientSecret: value, expiresAt }
+ }
+ const value = typeof payload.value === "string" ? payload.value : ""
+ return { clientSecret: value, expiresAt: null }
+}
+
+function openworkVoiceRealtimeInstructions() {
+ return `# Role and Objective
+
+You are OpenWork Voice Mode, a voice-first control layer inside OpenWork.
+Help the user control OpenWork by using the semantic OpenWork UI tools.
+
+# Tool Policy
+
+- Prefer openwork_snapshot, openwork_list_actions, and openwork_execute_action over visual guessing.
+- If the user asks to write or draft something, use composer.set_text.
+- If the user asks to send or run the current prompt, use composer.send.
+- For navigation, settings, session, transcript, and composer work, inspect the action list first if the action id is unknown.
+- Do not claim an action completed until the tool succeeds.
+- Ask for confirmation before destructive actions such as deleting a session.
+
+# Voice Style
+
+- Be concise, calm, and direct.
+- If audio is unclear, ask the user to repeat it instead of guessing.
+- Ignore background speech that is not addressed to OpenWork.
+- Summarize tool results briefly and offer the next useful step.`
+}
+
+async function createOpenAiRealtimeClientSecret(input: unknown, openworkRequestId: string) {
+ if (!env.openAiRealtimeApiKey) {
+ return Response.json({ error: { message: "Managed voice is not configured.", type: "invalid_request_error", code: "openai_realtime_key_missing" } }, { status: 503 })
+ }
+
+ const model = readStringField(input, "model") || OPENWORK_VOICE_REALTIME_MODEL
+ const response = await fetch("https://api.openai.com/v1/realtime/client_secrets", {
+ method: "POST",
+ headers: {
+ Authorization: `Bearer ${env.openAiRealtimeApiKey}`,
+ "Content-Type": "application/json",
+ },
+ body: JSON.stringify({
+ session: {
+ type: "realtime",
+ model,
+ output_modalities: ["audio"],
+ audio: {
+ input: {
+ transcription: { model: OPENWORK_VOICE_TRANSCRIPTION_MODEL, language: "en" },
+ turn_detection: {
+ type: "server_vad",
+ threshold: 0.58,
+ silence_duration_ms: 320,
+ prefix_padding_ms: 300,
+ create_response: true,
+ interrupt_response: true,
+ },
+ },
+ },
+ instructions: openworkVoiceRealtimeInstructions(),
+ tool_choice: "auto",
+ tools: OPENWORK_VOICE_REALTIME_TOOLS,
+ },
+ }),
+ })
+
+ const text = await response.text()
+ let payload: unknown = null
+ try {
+ payload = text ? JSON.parse(text) : null
+ } catch {
+ payload = null
+ }
+
+ if (!response.ok) {
+ const errorPayload = isRecord(payload) && isRecord(payload.error) ? payload.error : null
+ const message = typeof errorPayload?.message === "string" ? errorPayload.message : response.statusText
+ return Response.json({ error: { message: message || "Failed to create OpenAI Realtime session", type: "api_error", code: "openai_realtime_failed" } }, { status: response.status })
+ }
+
+ const { clientSecret, expiresAt } = readOpenAiClientSecret(payload)
+ if (!clientSecret) {
+ return Response.json({ error: { message: "OpenAI did not return a usable Realtime client secret.", type: "api_error", code: "openai_realtime_invalid_response" } }, { status: 502 })
+ }
+
+ return Response.json({
+ ok: true,
+ clientSecret,
+ expiresAt,
+ model,
+ transcriptionModel: OPENWORK_VOICE_TRANSCRIPTION_MODEL,
+ tools: OPENWORK_VOICE_REALTIME_TOOLS.map((tool) => tool.name),
+ source: "openwork-models",
+ openworkRequestId,
+ })
+}
+
+export function registerVoiceRoutes(app: Hono) {
+ app.post("/voice/realtime/session", async (c) => {
+ const rawKey = readApiKey(c.req.raw)
+ if (!rawKey) {
+ return c.json({ error: { message: "Missing OpenWork inference API key.", type: "authentication_error", code: "missing_api_key" } }, 401)
+ }
+
+ const inferenceKey = await findActiveInferenceKey(rawKey)
+ if (!inferenceKey) {
+ return c.json({ error: { message: "Invalid OpenWork inference API key.", type: "authentication_error", code: "invalid_api_key" } }, 401)
+ }
+
+ const limits = await ensureUsableBuckets(inferenceKey.organization_id)
+ if (!limits.ok) {
+ return c.json({ error: { message: `Rate limit reached for organization ${inferenceKey.organization_id}.`, type: "tokens", code: "rate_limit_exceeded" } }, 429)
+ }
+
+ const openworkRequestId = buildRequestId()
+ let body: unknown = {}
+ try {
+ body = await c.req.json()
+ } catch {
+ body = {}
+ }
+
+ return createOpenAiRealtimeClientSecret(body, openworkRequestId)
+ })
+}
diff --git a/evals/results/managed-voice-pr-proof/01-server-health.html b/evals/results/managed-voice-pr-proof/01-server-health.html
new file mode 100644
index 000000000..520fa0511
--- /dev/null
+++ b/evals/results/managed-voice-pr-proof/01-server-health.html
@@ -0,0 +1,20 @@
+
+
+
+
+
+ server health
+
+
+server health
{
+ "ok": true,
+ "version": "0.17.1",
+ "opencodeVersion": "1.17.3",
+ "uptimeMs": 207
+}
+
\ No newline at end of file
diff --git a/evals/results/managed-voice-pr-proof/02-owner-token.html b/evals/results/managed-voice-pr-proof/02-owner-token.html
new file mode 100644
index 000000000..ad0395899
--- /dev/null
+++ b/evals/results/managed-voice-pr-proof/02-owner-token.html
@@ -0,0 +1,21 @@
+
+
+
+
+
+ owner token
+
+
+owner token
{
+ "id": "4af562f6-1472-4c54-b275-4ce25d70f568",
+ "token": "[redacted]",
+ "scope": "owner",
+ "createdAt": 1781986782006,
+ "label": "managed voice e2e"
+}
+
\ No newline at end of file
diff --git a/evals/results/managed-voice-pr-proof/03-managed-voice-session.html b/evals/results/managed-voice-pr-proof/03-managed-voice-session.html
new file mode 100644
index 000000000..a1c4f3fdd
--- /dev/null
+++ b/evals/results/managed-voice-pr-proof/03-managed-voice-session.html
@@ -0,0 +1,27 @@
+
+
+
+
+
+ managed voice session
+
+
+managed voice session
{
+ "ok": true,
+ "clientSecret": "[redacted]",
+ "expiresAt": 987654321,
+ "model": "gpt-realtime-2",
+ "transcriptionModel": "gpt-4o-transcribe",
+ "tools": [
+ "openwork_snapshot",
+ "openwork_list_actions",
+ "openwork_execute_action"
+ ],
+ "source": "openwork-models"
+}
+
\ No newline at end of file
diff --git a/evals/results/managed-voice-pr-proof/04-broker-received-authenticated-request.html b/evals/results/managed-voice-pr-proof/04-broker-received-authenticated-request.html
new file mode 100644
index 000000000..70b979307
--- /dev/null
+++ b/evals/results/managed-voice-pr-proof/04-broker-received-authenticated-request.html
@@ -0,0 +1,20 @@
+
+
+
+
+
+ broker received authenticated request
+
+
+broker received authenticated request
{
+ "method": "POST",
+ "url": "/voice/realtime/session",
+ "authorization": "[redacted]",
+ "body": "{}"
+}
+
\ No newline at end of file
diff --git a/evals/results/managed-voice-pr-proof/index.html b/evals/results/managed-voice-pr-proof/index.html
new file mode 100644
index 000000000..2aa159921
--- /dev/null
+++ b/evals/results/managed-voice-pr-proof/index.html
@@ -0,0 +1,45 @@
+
+
+
+
+
+ Managed Voice E2E Proof
+
+
+
+ Managed Voice E2E Proof
+ Result: passed · Output: /Users/benjaminshafii/openwork-enterprise/_repos/openwork-managed-voice/evals/results/managed-voice-pr-proof
+
+
+
+
+
+
+
+
+ broker received authenticated request
+
+ Open frame
+
+
+
\ No newline at end of file