diff --git a/apps/app/scripts/_util.mjs b/apps/app/scripts/_util.mjs index 76369d1a9..6809cec0c 100644 --- a/apps/app/scripts/_util.mjs +++ b/apps/app/scripts/_util.mjs @@ -47,6 +47,7 @@ export async function spawnOpencodeServe({ hostname = "127.0.0.1", port, corsOrigins = [], + env = {}, }) { assert.ok(directory && directory.trim(), "directory is required"); assert.ok(Number.isInteger(port) && port > 0, "port must be a positive integer"); @@ -62,6 +63,7 @@ export async function spawnOpencodeServe({ stdio: ["ignore", "pipe", "pipe"], env: { ...process.env, + ...env, // Make it explicit we're a non-TUI client. OPENCODE_CLIENT: "openwork-test", }, diff --git a/apps/app/scripts/managed-voice-e2e.mjs b/apps/app/scripts/managed-voice-e2e.mjs new file mode 100644 index 000000000..c159d2ef0 --- /dev/null +++ b/apps/app/scripts/managed-voice-e2e.mjs @@ -0,0 +1,278 @@ +import assert from "node:assert/strict"; +import { spawn } from "node:child_process"; +import { createServer } from "node:http"; +import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join, resolve } from "node:path"; + +import { + findFreePort, + parseArgs, +} from "./_util.mjs"; + +const args = parseArgs(process.argv.slice(2)); +const directory = args.get("dir") ?? process.cwd(); +const outDir = resolve(args.get("out") ?? join(process.cwd(), "evals", "results", `managed-voice-${Date.now()}`)); + +const proofFrames = []; +const results = { + ok: true, + outDir, + steps: [], +}; + +function escapeHtml(value) { + return String(value) + .replaceAll("&", "&") + .replaceAll("<", "<") + .replaceAll(">", ">") + .replaceAll('"', """); +} + +async function frame(name, data) { + const file = `${String(proofFrames.length + 1).padStart(2, "0")}-${name.replace(/[^a-z0-9]+/gi, "-").replace(/^-|-$/g, "").toLowerCase()}.html`; + const safeData = redactProofData(data); + proofFrames.push({ file, name, data: safeData }); + await writeFile(join(outDir, file), ` + + + + + ${escapeHtml(name)} + + +

${escapeHtml(name)}

${escapeHtml(JSON.stringify(safeData, null, 2))}
+`, "utf8"); +} + +function redactProofData(value) { + if (Array.isArray(value)) return value.map(redactProofData); + if (!value || typeof value !== "object") return value; + return Object.fromEntries(Object.entries(value).map(([key, entry]) => { + if (/token|secret|authorization|api.?key/i.test(key)) return [key, "[redacted]"]; + if (typeof entry === "string" && /^(owt_|ow_inf_|sk-)/.test(entry)) return [key, "[redacted]"]; + return [key, redactProofData(entry)]; + })); +} + +async function renderIndex() { + const frames = proofFrames.map((entry) => ` +
+

${escapeHtml(entry.name)}

+ +

Open frame

+
`).join("\n"); + await writeFile(join(outDir, "index.html"), ` + + + + + Managed Voice E2E Proof + + +
+

Managed Voice E2E Proof

+
Result: ${results.ok ? "passed" : "failed"} · Output: ${escapeHtml(outDir)}
+${frames} +
+`, "utf8"); +} + +function step(name, fn) { + results.steps.push({ name, status: "running" }); + const idx = results.steps.length - 1; + return Promise.resolve() + .then(fn) + .then(async (data) => { + results.steps[idx] = { name, status: "ok", data }; + await frame(name, data); + return data; + }) + .catch(async (error) => { + results.ok = false; + const message = error instanceof Error ? error.message : String(error); + results.steps[idx] = { name, status: "error", error: message }; + await frame(`${name} failure`, { error: message }); + throw error; + }); +} + +async function startMockBroker() { + const requests = []; + const server = createServer((req, res) => { + let body = ""; + req.on("data", (chunk) => { + body += String(chunk); + }); + req.on("end", () => { + requests.push({ method: req.method, url: req.url, authorization: req.headers.authorization ?? null, body }); + if (req.method !== "POST" || req.url !== "/voice/realtime/session") { + res.writeHead(404, { "content-type": "application/json" }); + res.end(JSON.stringify({ error: "not_found" })); + return; + } + if (req.headers.authorization !== "Bearer ow_inf_e2e") { + res.writeHead(401, { "content-type": "application/json" }); + res.end(JSON.stringify({ error: { code: "invalid_api_key" } })); + return; + } + res.writeHead(200, { "content-type": "application/json" }); + res.end(JSON.stringify({ + ok: true, + clientSecret: "managed-e2e-client-secret", + expiresAt: 987654321, + model: "gpt-realtime-2", + transcriptionModel: "gpt-4o-transcribe", + tools: ["openwork_snapshot", "openwork_list_actions", "openwork_execute_action"], + source: "openwork-models", + })); + }); + }); + const port = await findFreePort(); + await new Promise((resolveReady) => server.listen(port, "127.0.0.1", resolveReady)); + return { + baseUrl: `http://127.0.0.1:${port}`, + requests, + close: () => new Promise((resolveClose) => server.close(resolveClose)), + }; +} + +async function startOpenWorkServer({ directory, port, env }) { + const token = "owt_managed_voice_client"; + const hostToken = "owt_managed_voice_host"; + const child = spawn("bun", [ + "apps/server/src/cli.ts", + "--host", "127.0.0.1", + "--port", String(port), + "--token", token, + "--host-token", hostToken, + "--workspace", directory, + "--approval", "auto", + "--no-log-requests", + ], { + cwd: resolve(join(import.meta.dirname, "..", "..", "..")), + stdio: ["ignore", "pipe", "pipe"], + env: { ...process.env, ...env, OPENWORK_DEV_MODE: "1" }, + }); + let stdout = ""; + let stderr = ""; + child.stdout.setEncoding("utf8"); + child.stderr.setEncoding("utf8"); + child.stdout.on("data", (chunk) => { stdout += chunk; }); + child.stderr.on("data", (chunk) => { stderr += chunk; }); + const baseUrl = `http://127.0.0.1:${port}`; + + return { + baseUrl, + token, + hostToken, + getStdout: () => stdout, + getStderr: () => stderr, + async close() { + if (child.exitCode !== null || child.signalCode !== null) return; + child.kill("SIGTERM"); + await Promise.race([ + new Promise((resolveExit) => child.once("exit", resolveExit)), + new Promise((resolveTimeout) => setTimeout(resolveTimeout, 2500)), + ]); + if (child.exitCode === null && child.signalCode === null) child.kill("SIGKILL"); + }, + }; +} + +async function waitForServerHealthy(baseUrl) { + const startedAt = Date.now(); + let lastError = ""; + while (Date.now() - startedAt < 30_000) { + try { + const response = await fetch(`${baseUrl}/health`, { signal: AbortSignal.timeout(2500) }); + if (response.ok) return response.json(); + lastError = `${response.status} ${response.statusText}`; + } catch (error) { + lastError = error instanceof Error ? error.message : String(error); + } + await new Promise((resolvePoll) => setTimeout(resolvePoll, 250)); + } + throw new Error(`Timed out waiting for OpenWork server health: ${lastError}`); +} + +await rm(outDir, { recursive: true, force: true }); +await mkdir(outDir, { recursive: true }); +const envDir = await mkdtemp(join(tmpdir(), "openwork-managed-voice-e2e-")); +const mockBroker = await startMockBroker(); +const port = await findFreePort(); +const server = await startOpenWorkServer({ + directory, + port, + env: { + OPENWORK_ENV_STORE: join(envDir, "env.json"), + OPENWORK_TOKEN_STORE: join(envDir, "tokens.json"), + OPENWORK_API_KEY: "ow_inf_e2e", + OPENWORK_INFERENCE_BASE_URL: mockBroker.baseUrl, + }, +}); + +try { + await step("server health", async () => waitForServerHealthy(server.baseUrl)); + + const owner = await step("owner token", async () => { + const response = await fetch(`${server.baseUrl}/tokens`, { + method: "POST", + headers: { "x-openwork-host-token": server.hostToken, "content-type": "application/json" }, + body: JSON.stringify({ scope: "owner", label: "managed voice e2e" }), + }); + assert.equal(response.status, 201); + const body = await response.json(); + assert.equal(typeof body.token, "string"); + return body; + }); + + const session = await step("managed voice session", async () => { + const response = await fetch(`${server.baseUrl}/voice/realtime/session`, { + method: "POST", + headers: { authorization: `Bearer ${owner.token}`, "content-type": "application/json" }, + body: JSON.stringify({}), + }); + assert.equal(response.status, 200); + const body = await response.json(); + assert.equal(body.ok, true); + assert.equal(body.clientSecret, "managed-e2e-client-secret"); + assert.equal(body.source, "openwork-models"); + return body; + }); + + await step("broker received authenticated request", async () => { + assert.equal(mockBroker.requests.length, 1); + assert.equal(mockBroker.requests[0].authorization, "Bearer ow_inf_e2e"); + assert.equal(session.model, "gpt-realtime-2"); + return mockBroker.requests[0]; + }); + + await renderIndex(); + console.log(JSON.stringify({ ...results, proof: join(outDir, "index.html") }, null, 2)); +} catch (error) { + const message = error instanceof Error ? error.message : String(error); + results.ok = false; + results.error = message; + results.stderr = server.getStderr(); + results.stdout = server.getStdout?.() ?? ""; + await renderIndex(); + console.error(JSON.stringify({ ...results, proof: join(outDir, "index.html") }, null, 2)); + process.exitCode = 1; +} finally { + await server.close(); + await mockBroker.close(); +} diff --git a/apps/app/src/app/lib/openwork-server.ts b/apps/app/src/app/lib/openwork-server.ts index 5ab36f7d4..edf04b635 100644 --- a/apps/app/src/app/lib/openwork-server.ts +++ b/apps/app/src/app/lib/openwork-server.ts @@ -1805,6 +1805,7 @@ export function createOpenworkServerClient(options: { baseUrl: string; token?: s model: string; transcriptionModel: string; tools: string[]; + source?: string; }>(baseUrl, "/voice/realtime/session", { token, hostToken, diff --git a/apps/app/src/react-app/domains/connections/provider-auth/store.ts b/apps/app/src/react-app/domains/connections/provider-auth/store.ts index e2c8aee85..93177fc50 100644 --- a/apps/app/src/react-app/domains/connections/provider-auth/store.ts +++ b/apps/app/src/react-app/domains/connections/provider-auth/store.ts @@ -353,6 +353,27 @@ export function createProviderAuthStore(options: CreateProviderAuthStoreOptions) return next; }; + const readCloudProviderBaseUrl = (provider: DenOrgLlmProviderConnection) => { + const options = provider.providerConfig.options; + if (options && typeof options === "object" && !Array.isArray(options)) { + const baseURL = "baseURL" in options ? options.baseURL : undefined; + if (typeof baseURL === "string" && baseURL.trim()) return baseURL.trim().replace(/\/api\/v1\/?$/, ""); + } + const api = provider.providerConfig.api; + if (typeof api === "string" && api.trim()) return api.trim().replace(/\/api\/v1\/?$/, ""); + return ""; + }; + + const mirrorOpenWorkModelsVoiceEnv = async (provider: DenOrgLlmProviderConnection, apiKey: string) => { + if (provider.source !== "openwork" || !apiKey.trim()) return; + const openworkClient = options.openworkServer.getSnapshot().openworkServerClient; + if (!openworkClient) return; + const baseUrl = readCloudProviderBaseUrl(provider); + const entries = [{ key: "OPENWORK_API_KEY", value: apiKey.trim() }]; + if (baseUrl) entries.push({ key: "OPENWORK_INFERENCE_BASE_URL", value: baseUrl }); + await openworkClient.upsertUserEnv(entries); + }; + const readWorkspaceOpenworkConfigRecord = async (): Promise< Record > => { @@ -1395,6 +1416,7 @@ export function createProviderAuthStore(options: CreateProviderAuthStoreOptions) providerID: localProviderId, auth: { type: "api", key: apiKey }, }); + await mirrorOpenWorkModelsVoiceEnv(provider, apiKey); } if (existingImported?.providerId && existingImported.providerId !== localProviderId) { try { diff --git a/apps/server/src/env-routes.e2e.test.ts b/apps/server/src/env-routes.e2e.test.ts index 86b838b62..f8d09b88e 100644 --- a/apps/server/src/env-routes.e2e.test.ts +++ b/apps/server/src/env-routes.e2e.test.ts @@ -17,6 +17,8 @@ const dirs: string[] = []; const priorEnvStore = process.env.OPENWORK_ENV_STORE; const priorTokenStore = process.env.OPENWORK_TOKEN_STORE; const priorOpenAiApiKey = process.env.OPENAI_API_KEY; +const priorOpenWorkApiKey = process.env.OPENWORK_API_KEY; +const priorOpenWorkInferenceBaseUrl = process.env.OPENWORK_INFERENCE_BASE_URL; const nativeFetch = globalThis.fetch; function baseConfig(): ServerConfig { @@ -82,6 +84,16 @@ afterEach(async () => { } else { process.env.OPENAI_API_KEY = priorOpenAiApiKey; } + if (priorOpenWorkApiKey === undefined) { + delete process.env.OPENWORK_API_KEY; + } else { + process.env.OPENWORK_API_KEY = priorOpenWorkApiKey; + } + if (priorOpenWorkInferenceBaseUrl === undefined) { + delete process.env.OPENWORK_INFERENCE_BASE_URL; + } else { + process.env.OPENWORK_INFERENCE_BASE_URL = priorOpenWorkInferenceBaseUrl; + } globalThis.fetch = nativeFetch; }); @@ -355,6 +367,57 @@ describe("env routes", () => { }); }); + test("voice realtime session prefers OpenWork Models broker when configured", async () => { + process.env.OPENWORK_API_KEY = "ow_inf_test"; + process.env.OPENWORK_INFERENCE_BASE_URL = "https://inference.example.test"; + process.env.OPENAI_API_KEY = "sk-should-not-be-used"; + const { base } = await boot(); + + globalThis.fetch = ((input, init) => { + const url = String(input); + if (url === "https://inference.example.test/voice/realtime/session") { + expect(init?.headers).toMatchObject({ Authorization: "Bearer ow_inf_test" }); + return Promise.resolve(new Response(JSON.stringify({ + ok: true, + clientSecret: "managed-rt-secret", + expiresAt: 456, + model: "gpt-realtime-2", + transcriptionModel: "gpt-4o-transcribe", + tools: ["openwork_snapshot"], + source: "openwork-models", + }), { + status: 200, + headers: { "content-type": "application/json" }, + })); + } + if (url === "https://api.openai.com/v1/realtime/client_secrets") { + return Promise.resolve(new Response("direct OpenAI should not be called", { status: 500 })); + } + return nativeFetch(input, init); + }) as typeof fetch; + + const issued = await fetch(`${base}/tokens`, { + method: "POST", + headers: hostAuth(), + body: JSON.stringify({ scope: "owner", label: "managed voice owner" }), + }); + const tokenBody = (await issued.json()) as { token: string }; + + const response = await fetch(`${base}/voice/realtime/session`, { + method: "POST", + headers: { authorization: `Bearer ${tokenBody.token}`, "content-type": "application/json" }, + body: JSON.stringify({}), + }); + + expect(response.status).toBe(200); + expect(await response.json()).toMatchObject({ + ok: true, + clientSecret: "managed-rt-secret", + expiresAt: 456, + source: "openwork-models", + }); + }); + test("values persist across server restart", async () => { const first = await boot(); await fetch(`${first.base}/env`, { diff --git a/apps/server/src/server.ts b/apps/server/src/server.ts index 8c5c77b67..7e4c66859 100644 --- a/apps/server/src/server.ts +++ b/apps/server/src/server.ts @@ -277,6 +277,26 @@ async function resolveOpenAiRealtimeApiKey(env: EnvService): Promise { ""; } +async function resolveOpenWorkModelsVoiceConfig(env: EnvService): Promise<{ baseUrl: string; apiKey: string } | null> { + const records = await env.list(); + const apiKey = + records.find((entry) => entry.key === "OPENWORK_API_KEY")?.value.trim() || + records.find((entry) => entry.key === "OPENWORK_MODELS_API_KEY")?.value.trim() || + process.env.OPENWORK_API_KEY?.trim() || + process.env.OPENWORK_MODELS_API_KEY?.trim() || + ""; + if (!apiKey) return null; + + const baseUrl = + records.find((entry) => entry.key === "OPENWORK_INFERENCE_BASE_URL")?.value.trim() || + records.find((entry) => entry.key === "OPENWORK_MODELS_BASE_URL")?.value.trim() || + process.env.OPENWORK_INFERENCE_BASE_URL?.trim() || + process.env.OPENWORK_MODELS_BASE_URL?.trim() || + ""; + if (!baseUrl) return null; + return { apiKey, baseUrl: baseUrl.replace(/\/+$/, "") }; +} + function openworkVoiceRealtimeInstructions() { return `# Role and Objective @@ -323,6 +343,34 @@ function readOpenAiClientSecret(payload: unknown): { clientSecret: string; expir } async function createOpenAiRealtimeVoiceSession(env: EnvService, input: unknown) { + const managedVoice = await resolveOpenWorkModelsVoiceConfig(env); + if (managedVoice) { + const response = await fetch(`${managedVoice.baseUrl}/voice/realtime/session`, { + method: "POST", + headers: { + Authorization: `Bearer ${managedVoice.apiKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify(input ?? {}), + }); + const text = await response.text(); + let payload: unknown = null; + try { + payload = text ? JSON.parse(text) : null; + } catch { + payload = null; + } + if (!response.ok) { + const errorPayload = isRecord(payload) && isRecord(payload.error) ? payload.error : null; + const message = typeof errorPayload?.message === "string" ? errorPayload.message : response.statusText; + throw new ApiError(response.status, "openwork_models_voice_failed", message || "OpenWork Models could not create a voice session"); + } + if (!isRecord(payload) || payload.ok !== true || typeof payload.clientSecret !== "string") { + throw new ApiError(502, "openwork_models_voice_invalid_response", "OpenWork Models did not return a usable Realtime client secret"); + } + return payload; + } + const apiKey = await resolveOpenAiRealtimeApiKey(env); if (!apiKey) { throw new ApiError( diff --git a/ee/apps/inference/src/app.ts b/ee/apps/inference/src/app.ts index 62e4908ba..6caa2ffef 100644 --- a/ee/apps/inference/src/app.ts +++ b/ee/apps/inference/src/app.ts @@ -8,6 +8,7 @@ import { logger } from "hono/logger"; import { z } from "zod"; import { env } from "./env.js"; import { registerProxyRoutes } from "./proxy.js"; +import { registerVoiceRoutes } from "./voice.js"; import { registerWebhookRoutes } from "./webhooks.js"; const srcDir = path.dirname(fileURLToPath(import.meta.url)); @@ -65,6 +66,7 @@ if (shouldServeLocalModelCatalog) { } registerProxyRoutes(app); +registerVoiceRoutes(app); registerWebhookRoutes(app); app.onError((error, c) => { diff --git a/ee/apps/inference/src/env.ts b/ee/apps/inference/src/env.ts index e7aca8ebe..66aa5ac3e 100644 --- a/ee/apps/inference/src/env.ts +++ b/ee/apps/inference/src/env.ts @@ -14,6 +14,8 @@ const EnvSchema = z DEN_DB_ENCRYPTION_KEY: z.string().trim().min(32), INFERENCE_PROXY_BASE_URL: z.string().optional(), OPENROUTER_UPSTREAM_URL: z.string().optional(), + OPENAI_REALTIME_API_KEY: z.string().optional(), + OPENAI_API_KEY: z.string().optional(), INFERENCE_ADMIN_TOKEN: z.string().optional(), INFERENCE_WEBHOOK_SECRET: z.string().optional(), INFERENCE_CREDITS_PER_DOLLAR: z.string().optional(), @@ -120,6 +122,7 @@ export const env = { openRouterUpstreamUrl: normalizeUrl( parsed.OPENROUTER_UPSTREAM_URL ?? "https://openrouter.ai/api/v1", ), + openAiRealtimeApiKey: optionalString(parsed.OPENAI_REALTIME_API_KEY) ?? optionalString(parsed.OPENAI_API_KEY), adminToken: optionalString(parsed.INFERENCE_ADMIN_TOKEN), webhookSecret: optionalString(parsed.INFERENCE_WEBHOOK_SECRET), creditsPerDollar: parseCreditsPerDollar(parsed.INFERENCE_CREDITS_PER_DOLLAR), diff --git a/ee/apps/inference/src/voice.ts b/ee/apps/inference/src/voice.ts new file mode 100644 index 000000000..d6adf715b --- /dev/null +++ b/ee/apps/inference/src/voice.ts @@ -0,0 +1,192 @@ +import { createHash } from "node:crypto" +import type { Hono } from "hono" +import { env } from "./env.js" +import { findActiveInferenceKey } from "./keys.js" +import { ensureUsableBuckets } from "./limits.js" + +const OPENWORK_VOICE_REALTIME_MODEL = "gpt-realtime-2" +const OPENWORK_VOICE_TRANSCRIPTION_MODEL = "gpt-4o-transcribe" + +const OPENWORK_VOICE_REALTIME_TOOLS = [ + { + type: "function", + name: "openwork_snapshot", + description: "Read the current OpenWork UI control snapshot: route, status, narration, and visible action metadata.", + parameters: { type: "object", properties: {}, additionalProperties: false }, + }, + { + type: "function", + name: "openwork_list_actions", + description: "List semantic OpenWork UI actions. Call this before openwork_execute_action when you do not know the exact action id.", + parameters: { type: "object", properties: {}, additionalProperties: false }, + }, + { + type: "function", + name: "openwork_execute_action", + description: "Execute a semantic OpenWork UI action by id. Prefer this over screen coordinates or DOM guessing.", + parameters: { + type: "object", + properties: { + actionId: { type: "string", description: "The action id from openwork_list_actions, such as composer.set_text or composer.send." }, + args: { type: "object", description: "Optional JSON arguments for the action.", additionalProperties: true }, + }, + required: ["actionId"], + additionalProperties: false, + }, + }, +] + +function readApiKey(request: Request) { + const auth = request.headers.get("authorization") + if (auth?.toLowerCase().startsWith("bearer ")) { + return auth.slice(7).trim() + } + return request.headers.get("x-api-key")?.trim() ?? null +} + +function buildRequestId() { + return createHash("sha256").update(`${Date.now()}:${Math.random()}`).digest("hex").slice(0, 32) +} + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value) +} + +function readStringField(value: unknown, key: string) { + if (!isRecord(value)) return "" + const field = value[key] + return typeof field === "string" ? field.trim() : "" +} + +function readOpenAiClientSecret(payload: unknown): { clientSecret: string; expiresAt: number | null } { + if (!isRecord(payload)) return { clientSecret: "", expiresAt: null } + const clientSecret = payload.client_secret + if (typeof clientSecret === "string") return { clientSecret, expiresAt: null } + if (isRecord(clientSecret)) { + const value = typeof clientSecret.value === "string" ? clientSecret.value : "" + const expiresAt = typeof clientSecret.expires_at === "number" ? clientSecret.expires_at : null + return { clientSecret: value, expiresAt } + } + const value = typeof payload.value === "string" ? payload.value : "" + return { clientSecret: value, expiresAt: null } +} + +function openworkVoiceRealtimeInstructions() { + return `# Role and Objective + +You are OpenWork Voice Mode, a voice-first control layer inside OpenWork. +Help the user control OpenWork by using the semantic OpenWork UI tools. + +# Tool Policy + +- Prefer openwork_snapshot, openwork_list_actions, and openwork_execute_action over visual guessing. +- If the user asks to write or draft something, use composer.set_text. +- If the user asks to send or run the current prompt, use composer.send. +- For navigation, settings, session, transcript, and composer work, inspect the action list first if the action id is unknown. +- Do not claim an action completed until the tool succeeds. +- Ask for confirmation before destructive actions such as deleting a session. + +# Voice Style + +- Be concise, calm, and direct. +- If audio is unclear, ask the user to repeat it instead of guessing. +- Ignore background speech that is not addressed to OpenWork. +- Summarize tool results briefly and offer the next useful step.` +} + +async function createOpenAiRealtimeClientSecret(input: unknown, openworkRequestId: string) { + if (!env.openAiRealtimeApiKey) { + return Response.json({ error: { message: "Managed voice is not configured.", type: "invalid_request_error", code: "openai_realtime_key_missing" } }, { status: 503 }) + } + + const model = readStringField(input, "model") || OPENWORK_VOICE_REALTIME_MODEL + const response = await fetch("https://api.openai.com/v1/realtime/client_secrets", { + method: "POST", + headers: { + Authorization: `Bearer ${env.openAiRealtimeApiKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + session: { + type: "realtime", + model, + output_modalities: ["audio"], + audio: { + input: { + transcription: { model: OPENWORK_VOICE_TRANSCRIPTION_MODEL, language: "en" }, + turn_detection: { + type: "server_vad", + threshold: 0.58, + silence_duration_ms: 320, + prefix_padding_ms: 300, + create_response: true, + interrupt_response: true, + }, + }, + }, + instructions: openworkVoiceRealtimeInstructions(), + tool_choice: "auto", + tools: OPENWORK_VOICE_REALTIME_TOOLS, + }, + }), + }) + + const text = await response.text() + let payload: unknown = null + try { + payload = text ? JSON.parse(text) : null + } catch { + payload = null + } + + if (!response.ok) { + const errorPayload = isRecord(payload) && isRecord(payload.error) ? payload.error : null + const message = typeof errorPayload?.message === "string" ? errorPayload.message : response.statusText + return Response.json({ error: { message: message || "Failed to create OpenAI Realtime session", type: "api_error", code: "openai_realtime_failed" } }, { status: response.status }) + } + + const { clientSecret, expiresAt } = readOpenAiClientSecret(payload) + if (!clientSecret) { + return Response.json({ error: { message: "OpenAI did not return a usable Realtime client secret.", type: "api_error", code: "openai_realtime_invalid_response" } }, { status: 502 }) + } + + return Response.json({ + ok: true, + clientSecret, + expiresAt, + model, + transcriptionModel: OPENWORK_VOICE_TRANSCRIPTION_MODEL, + tools: OPENWORK_VOICE_REALTIME_TOOLS.map((tool) => tool.name), + source: "openwork-models", + openworkRequestId, + }) +} + +export function registerVoiceRoutes(app: Hono) { + app.post("/voice/realtime/session", async (c) => { + const rawKey = readApiKey(c.req.raw) + if (!rawKey) { + return c.json({ error: { message: "Missing OpenWork inference API key.", type: "authentication_error", code: "missing_api_key" } }, 401) + } + + const inferenceKey = await findActiveInferenceKey(rawKey) + if (!inferenceKey) { + return c.json({ error: { message: "Invalid OpenWork inference API key.", type: "authentication_error", code: "invalid_api_key" } }, 401) + } + + const limits = await ensureUsableBuckets(inferenceKey.organization_id) + if (!limits.ok) { + return c.json({ error: { message: `Rate limit reached for organization ${inferenceKey.organization_id}.`, type: "tokens", code: "rate_limit_exceeded" } }, 429) + } + + const openworkRequestId = buildRequestId() + let body: unknown = {} + try { + body = await c.req.json() + } catch { + body = {} + } + + return createOpenAiRealtimeClientSecret(body, openworkRequestId) + }) +} diff --git a/evals/results/managed-voice-pr-proof/01-server-health.html b/evals/results/managed-voice-pr-proof/01-server-health.html new file mode 100644 index 000000000..520fa0511 --- /dev/null +++ b/evals/results/managed-voice-pr-proof/01-server-health.html @@ -0,0 +1,20 @@ + + + + + + server health + + +

server health

{
+  "ok": true,
+  "version": "0.17.1",
+  "opencodeVersion": "1.17.3",
+  "uptimeMs": 207
+}
+ \ No newline at end of file diff --git a/evals/results/managed-voice-pr-proof/02-owner-token.html b/evals/results/managed-voice-pr-proof/02-owner-token.html new file mode 100644 index 000000000..ad0395899 --- /dev/null +++ b/evals/results/managed-voice-pr-proof/02-owner-token.html @@ -0,0 +1,21 @@ + + + + + + owner token + + +

owner token

{
+  "id": "4af562f6-1472-4c54-b275-4ce25d70f568",
+  "token": "[redacted]",
+  "scope": "owner",
+  "createdAt": 1781986782006,
+  "label": "managed voice e2e"
+}
+ \ No newline at end of file diff --git a/evals/results/managed-voice-pr-proof/03-managed-voice-session.html b/evals/results/managed-voice-pr-proof/03-managed-voice-session.html new file mode 100644 index 000000000..a1c4f3fdd --- /dev/null +++ b/evals/results/managed-voice-pr-proof/03-managed-voice-session.html @@ -0,0 +1,27 @@ + + + + + + managed voice session + + +

managed voice session

{
+  "ok": true,
+  "clientSecret": "[redacted]",
+  "expiresAt": 987654321,
+  "model": "gpt-realtime-2",
+  "transcriptionModel": "gpt-4o-transcribe",
+  "tools": [
+    "openwork_snapshot",
+    "openwork_list_actions",
+    "openwork_execute_action"
+  ],
+  "source": "openwork-models"
+}
+ \ No newline at end of file diff --git a/evals/results/managed-voice-pr-proof/04-broker-received-authenticated-request.html b/evals/results/managed-voice-pr-proof/04-broker-received-authenticated-request.html new file mode 100644 index 000000000..70b979307 --- /dev/null +++ b/evals/results/managed-voice-pr-proof/04-broker-received-authenticated-request.html @@ -0,0 +1,20 @@ + + + + + + broker received authenticated request + + +

broker received authenticated request

{
+  "method": "POST",
+  "url": "/voice/realtime/session",
+  "authorization": "[redacted]",
+  "body": "{}"
+}
+ \ No newline at end of file diff --git a/evals/results/managed-voice-pr-proof/index.html b/evals/results/managed-voice-pr-proof/index.html new file mode 100644 index 000000000..2aa159921 --- /dev/null +++ b/evals/results/managed-voice-pr-proof/index.html @@ -0,0 +1,45 @@ + + + + + + Managed Voice E2E Proof + + +
+

Managed Voice E2E Proof

+
Result: passed · Output: /Users/benjaminshafii/openwork-enterprise/_repos/openwork-managed-voice/evals/results/managed-voice-pr-proof
+ +
+

server health

+ +

Open frame

+
+ +
+

owner token

+ +

Open frame

+
+ +
+

managed voice session

+ +

Open frame

+
+ +
+

broker received authenticated request

+ +

Open frame

+
+
+ \ No newline at end of file