diff --git a/packages/keiko-model-gateway/src/index.ts b/packages/keiko-model-gateway/src/index.ts
index 06219546..6a21fbf4 100644
--- a/packages/keiko-model-gateway/src/index.ts
+++ b/packages/keiko-model-gateway/src/index.ts
@@ -129,10 +129,13 @@ export {
 export {
   MAX_SPEECH_AUDIO_BYTES,
   requestTextToSpeech,
+  requestTextToSpeechStream,
   type SpeechResponseFormat,
   type TextToSpeechErrorKind,
   type TextToSpeechOutcome,
   type TextToSpeechRequest,
+  type TextToSpeechStreamOutcome,
+  type TextToSpeechStreamSuccess,
   type TextToSpeechSuccess,
 } from "./text-to-speech-adapter.js";
 
diff --git a/packages/keiko-model-gateway/src/text-to-speech-adapter.test.ts b/packages/keiko-model-gateway/src/text-to-speech-adapter.test.ts
index 58835927..d20fc0b6 100644
--- a/packages/keiko-model-gateway/src/text-to-speech-adapter.test.ts
+++ b/packages/keiko-model-gateway/src/text-to-speech-adapter.test.ts
@@ -1,5 +1,9 @@
 import { describe, expect, it } from "vitest";
-import { MAX_SPEECH_AUDIO_BYTES, requestTextToSpeech } from "./text-to-speech-adapter.js";
+import {
+  MAX_SPEECH_AUDIO_BYTES,
+  requestTextToSpeech,
+  requestTextToSpeechStream,
+} from "./text-to-speech-adapter.js";
 import { OutboundHttpEgressError } from "./http.js";
 
 // A recognizable audio byte marker so a test can assert the adapter returns the provider body verbatim
@@ -331,3 +335,67 @@ describe("requestTextToSpeech", () => {
     expect(seenBody).not.toContain(SECRET_API_KEY);
   });
 });
+
+async function collect(stream: ReadableStream<Uint8Array>): Promise<Uint8Array> {
+  const reader = stream.getReader();
+  const parts: Uint8Array[] = [];
+  for (;;) {
+    const { done, value } = await reader.read();
+    if (done) break;
+    parts.push(value);
+  }
+  const total = parts.reduce((n, p) => n + p.length, 0);
+  const out = new Uint8Array(total);
+  let offset = 0;
+  for (const p of parts) {
+    out.set(p, offset);
+    offset += p.length;
+  }
+  return out;
+}
+
+describe("requestTextToSpeechStream", () => {
+  it("returns the provider body as a byte stream with the resolved mime type", async () => {
+    const fetchImpl = mockFetch(() => audioResponse(AUDIO_BYTES, "audio/pcm"));
+    const outcome = await requestTextToSpeechStream({
+      endpoint: ENDPOINT,
+      apiKey: SECRET_API_KEY,
+      modelId: "keiko-tts",
+      input: ANSWER,
+      responseFormat: "pcm",
+      fetchImpl,
+    });
+    expect(outcome.ok).toBe(true);
+    if (!outcome.ok) return;
+    expect(outcome.value.mimeType).toBe("audio/pcm");
+    expect(new TextDecoder().decode(await collect(outcome.value.body))).toBe(AUDIO_MARKER);
+  });
+
+  it("maps a provider error status to a coded kind without streaming a body", async () => {
+    const fetchImpl = mockFetch(() => new Response("provider error page", { status: 429 }));
+    const outcome = await requestTextToSpeechStream({
+      endpoint: ENDPOINT,
+      apiKey: SECRET_API_KEY,
+      modelId: "keiko-tts",
+      input: ANSWER,
+      fetchImpl,
+    });
+    expect(outcome).toEqual({ ok: false, kind: "rate-limited" });
+  });
+
+  it("errors the stream once the audio exceeds the size cap", async () => {
+    const fetchImpl = mockFetch(() => audioResponse(new Uint8Array(100), "audio/pcm"));
+    const outcome = await requestTextToSpeechStream({
+      endpoint: ENDPOINT,
+      apiKey: SECRET_API_KEY,
+      modelId: "keiko-tts",
+      input: ANSWER,
+      responseFormat: "pcm",
+      maxAudioBytes: 10,
+      fetchImpl,
+    });
+    expect(outcome.ok).toBe(true);
+    if (!outcome.ok) return;
+    await expect(collect(outcome.value.body)).rejects.toThrow();
+  });
+});
diff --git a/packages/keiko-model-gateway/src/text-to-speech-adapter.ts b/packages/keiko-model-gateway/src/text-to-speech-adapter.ts
index 527adc61..67b9f4f8 100644
--- a/packages/keiko-model-gateway/src/text-to-speech-adapter.ts
+++ b/packages/keiko-model-gateway/src/text-to-speech-adapter.ts
@@ -276,3 +276,77 @@ export async function requestTextToSpeech(
   }
   return decodeSuccess(dispatched, built);
 }
+
+export interface TextToSpeechStreamSuccess {
+  // The synthesized audio bytes as they arrive from the provider, capped incrementally so a hostile or
+  // misconfigured endpoint cannot stream an unbounded body. The BFF pipes these straight to the browser
+  // (no whole-clip buffering, no base64 envelope) for start-on-first-chunk playback.
+  readonly body: ReadableStream<Uint8Array>;
+  readonly mimeType: string;
+}
+
+export type TextToSpeechStreamOutcome =
+  | { readonly ok: true; readonly value: TextToSpeechStreamSuccess }
+  | { readonly ok: false; readonly kind: TextToSpeechErrorKind };
+
+// Wraps the provider body in a passthrough that aborts once `maxBytes` is exceeded, so the streamed
+// response inherits the same size ceiling as the buffered path without ever holding the whole clip.
+function boundBodyStream(
+  source: ReadableStream<Uint8Array>,
+  maxBytes: number,
+): ReadableStream<Uint8Array> {
+  const reader = source.getReader();
+  let total = 0;
+  return new ReadableStream<Uint8Array>({
+    async pull(controller): Promise<void> {
+      try {
+        const { done, value } = await reader.read();
+        if (done) {
+          controller.close();
+          return;
+        }
+        total += value.byteLength;
+        if (total > maxBytes) {
+          await reader.cancel();
+          controller.error(new Error("synthesized audio exceeded the size limit"));
+          return;
+        }
+        controller.enqueue(value);
+      } catch (error) {
+        controller.error(error);
+      }
+    },
+    cancel(reason): void {
+      void reader.cancel(reason);
+    },
+  });
+}
+
+// Streaming variant of requestTextToSpeech: returns the provider audio as a bounded byte stream instead
+// of a fully-buffered clip, so the BFF can forward it chunk-by-chunk and the browser can start playback
+// on the first chunk. Same provider contract, auth, egress seam, error coding, and size cap; only the
+// delivery shape differs. Raw audio is never persisted here.
+export async function requestTextToSpeechStream(
+  request: TextToSpeechRequest,
+): Promise<TextToSpeechStreamOutcome> {
+  const built = buildRequest(request);
+  const dispatched = await dispatch(built, request.fetchImpl, request.egress);
+  if (typeof dispatched === "string") {
+    return { ok: false, kind: dispatched };
+  }
+  if (!dispatched.ok) {
+    const kind = classifyStatus(dispatched.status) ?? "transport";
+    await discardBody(dispatched);
+    return { ok: false, kind };
+  }
+  if (dispatched.body === null) {
+    return { ok: false, kind: "empty-audio" };
+  }
+  return {
+    ok: true,
+    value: {
+      body: boundBodyStream(dispatched.body, built.maxAudioBytes),
+      mimeType: resolveMimeType(dispatched, built.responseFormat),
+    },
+  };
+}
diff --git a/packages/keiko-server/src/deps.ts b/packages/keiko-server/src/deps.ts
index a7bb4259..932c2c8c 100644
--- a/packages/keiko-server/src/deps.ts
+++ b/packages/keiko-server/src/deps.ts
@@ -63,6 +63,7 @@ import type {
   SpeechToTextRequest,
   TextToSpeechOutcome,
   TextToSpeechRequest,
+  TextToSpeechStreamOutcome,
 } from "@oscharko-dev/keiko-model-gateway";
 import {
   createRelationshipStorePort,
@@ -285,6 +286,12 @@ export interface UiHandlerDeps {
   readonly voiceSpeechRequest?:
     | ((request: TextToSpeechRequest) => Promise<TextToSpeechOutcome>)
     | undefined;
+  // Streaming counterpart of voiceSpeechRequest (Issue #1556). Lets the /api/voice/speak/stream route
+  // forward provider PCM chunk-by-chunk in tests without touching global fetch. Production leaves it
+  // undefined and uses requestTextToSpeechStream; raw audio is streamed through, never persisted.
+  readonly voiceSpeechStreamRequest?:
+    | ((request: TextToSpeechRequest) => Promise<TextToSpeechStreamOutcome>)
+    | undefined;
   // Issue #497 (Epic #491) — realtime voice proxied-SDP negotiation seam (ADR-0058 D3/D6). Lets the
   // WebSocket control plane perform the browser↔provider SDP exchange through the provider-neutral
   // realtime adapter without touching global fetch in tests. Production leaves this undefined and
diff --git a/packages/keiko-server/src/routes.ts b/packages/keiko-server/src/routes.ts
index 00d00abe..bc91ee56 100644
--- a/packages/keiko-server/src/routes.ts
+++ b/packages/keiko-server/src/routes.ts
@@ -18,7 +18,11 @@ import {
   handleEvidenceDetail,
 } from "./read-handlers.js";
 import { handleGetWorkspaceState, handlePutWorkspaceState } from "./workspace-state-handlers.js";
-import { handleVoiceSpeak, handleVoiceTranscribe } from "./voice-handlers.js";
+import {
+  handleVoiceSpeak,
+  handleVoiceSpeakStream,
+  handleVoiceTranscribe,
+} from "./voice-handlers.js";
 import { handleVoiceRecapBuild } from "./voice-recap.js";
 import {
   handleCreateRun,
@@ -279,6 +283,7 @@ export const API_ROUTES: readonly RouteDefinition[] = [
   // the visible assistant answer text (inside the JSON + CSRF envelope) and receive synthesized audio
   // as base64; answers VOICE_UNAVAILABLE when no speech-output capability is configured/enabled.
   { method: "POST", pattern: "/api/voice/speak", handler: handleVoiceSpeak },
+  { method: "POST", pattern: "/api/voice/speak/stream", handler: handleVoiceSpeakStream },
   // Issue #504 (Epic #491, ADR-0067) — optional, capability-gated, user-triggered voice session recap.
   // POST the committed transcript text (content-free counts alongside) and derive memory candidates via
   // the EXISTING governed capture path; candidates surface in the existing review queue as "proposed".
diff --git a/packages/keiko-server/src/voice-handlers.speak.test.ts b/packages/keiko-server/src/voice-handlers.speak.test.ts
index 25040b9e..45a2c0d4 100644
--- a/packages/keiko-server/src/voice-handlers.speak.test.ts
+++ b/packages/keiko-server/src/voice-handlers.speak.test.ts
@@ -1,14 +1,15 @@
 import { describe, expect, it } from "vitest";
 import { Readable } from "node:stream";
 import type { IncomingMessage } from "node:http";
-import { handleVoiceSpeak } from "./voice-handlers.js";
+import { handleVoiceSpeak, handleVoiceSpeakStream } from "./voice-handlers.js";
 import { buildRedactor, createRunRegistry, type UiHandlerDeps } from "./index.js";
 import { createInMemoryUiStore } from "./store/index.js";
-import type { RouteContext } from "./routes.js";
+import { STREAMING, type RouteContext, type RouteResult } from "./routes.js";
 import type {
   GatewayConfig,
   TextToSpeechOutcome,
   TextToSpeechRequest,
+  TextToSpeechStreamOutcome,
 } from "@oscharko-dev/keiko-model-gateway";
 
 const PROVIDER_SECRET = "voice-tts-secret-token-1234567890";
@@ -360,3 +361,117 @@ describe("POST /api/voice/speak — provider failure mapping (AC4)", () => {
     expect(serialized).not.toContain(PROVIDER_BASE_URL);
   });
 });
+
+// A minimal ServerResponse fake capturing the streaming write path.
+class FakeRes {
+  statusCode: number | undefined;
+  headers: Record<string, string> | undefined;
+  readonly chunks: Uint8Array[] = [];
+  ended = false;
+  destroyed = false;
+  writeHead(status: number, headers?: Record<string, string>): this {
+    this.statusCode = status;
+    this.headers = headers;
+    return this;
+  }
+  write(chunk: Uint8Array): boolean {
+    this.chunks.push(chunk);
+    return true;
+  }
+  end(): void {
+    this.ended = true;
+  }
+  on(): this {
+    return this;
+  }
+  destroy(): void {
+    this.destroyed = true;
+  }
+}
+
+function streamOf(chunks: Uint8Array[]): ReadableStream<Uint8Array> {
+  let i = 0;
+  return new ReadableStream<Uint8Array>({
+    pull(controller): void {
+      const chunk = chunks[i];
+      if (chunk !== undefined) {
+        i += 1;
+        controller.enqueue(chunk);
+      } else {
+        controller.close();
+      }
+    },
+  });
+}
+
+function streamCtx(body: unknown, res: FakeRes): RouteContext {
+  return {
+    req: Readable.from([Buffer.from(JSON.stringify(body), "utf8")]) as IncomingMessage,
+    res: res as unknown as RouteContext["res"],
+    params: {},
+    url: new URL("http://127.0.0.1/api/voice/speak/stream"),
+  };
+}
+
+function streamOk(
+  body: ReadableStream<Uint8Array>,
+  mimeType = "audio/pcm",
+): TextToSpeechStreamOutcome {
+  return { ok: true, value: { body, mimeType } };
+}
+
+describe("POST /api/voice/speak/stream", () => {
+  it("requests pcm, streams the provider bytes with audio/pcm, and returns STREAMING", async () => {
+    const seen: TextToSpeechRequest[] = [];
+    const res = new FakeRes();
+    const deps = depsWith({
+      config: SPEECH_OUTPUT_CONFIG,
+      configPresent: true,
+      voiceSpeechStreamRequest: (
+        request: TextToSpeechRequest,
+      ): Promise<TextToSpeechStreamOutcome> => {
+        seen.push(request);
+        return Promise.resolve(
+          streamOk(streamOf([new Uint8Array([1, 2, 3]), new Uint8Array([4, 5])])),
+        );
+      },
+    });
+
+    const outcome = await handleVoiceSpeakStream(streamCtx({ text: "spoken answer" }, res), deps);
+    expect(outcome).toBe(STREAMING);
+    expect(res.statusCode).toBe(200);
+    expect(res.headers?.["Content-Type"]).toBe("audio/pcm");
+    expect(Buffer.concat(res.chunks.map((c) => Buffer.from(c)))).toEqual(
+      Buffer.from([1, 2, 3, 4, 5]),
+    );
+    expect(res.ended).toBe(true);
+    // The streaming path requests raw pcm (fastest to first audio).
+    expect(seen[0]?.responseFormat).toBe("pcm");
+    expect(seen[0]?.signal).toBeDefined();
+  });
+
+  it("returns a coded error RouteResult BEFORE any headers when synthesis fails", async () => {
+    const res = new FakeRes();
+    const deps = depsWith({
+      config: SPEECH_OUTPUT_CONFIG,
+      configPresent: true,
+      voiceSpeechStreamRequest: (): Promise<TextToSpeechStreamOutcome> =>
+        Promise.resolve({ ok: false, kind: "rate-limited" }),
+    });
+    const outcome = await handleVoiceSpeakStream(streamCtx({ text: "spoken answer" }, res), deps);
+    expect(outcome).not.toBe(STREAMING);
+    expect((outcome as RouteResult).status).toBe(429);
+    expect(res.statusCode).toBeUndefined(); // never committed a 200 + audio headers
+    expect(res.ended).toBe(false);
+  });
+
+  it("returns 503 VOICE_UNAVAILABLE for an STT-only deployment (no streaming)", async () => {
+    const res = new FakeRes();
+    const outcome = await handleVoiceSpeakStream(
+      streamCtx({ text: "x" }, res),
+      depsWith({ config: STT_ONLY_CONFIG, configPresent: true }),
+    );
+    expect((outcome as RouteResult).status).toBe(503);
+    expect(res.statusCode).toBeUndefined();
+  });
+});
diff --git a/packages/keiko-server/src/voice-handlers.ts b/packages/keiko-server/src/voice-handlers.ts
index ae9502b1..71254e13 100644
--- a/packages/keiko-server/src/voice-handlers.ts
+++ b/packages/keiko-server/src/voice-handlers.ts
@@ -17,6 +17,7 @@ import type { IncomingMessage } from "node:http";
 import {
   requestSpeechToText,
   requestTextToSpeech,
+  requestTextToSpeechStream,
   resolveVoiceCapability,
   selectSpeechOutputModel,
   selectSpeechToTextModel,
@@ -29,11 +30,12 @@ import {
   type SpeechToTextSuccess,
   type TextToSpeechErrorKind,
   type TextToSpeechRequest,
+  type TextToSpeechStreamOutcome,
   type TextToSpeechSuccess,
   type VoicePersona,
 } from "@oscharko-dev/keiko-model-gateway";
-import type { RouteContext, RouteResult } from "./routes.js";
-import { errorBody } from "./routes.js";
+import type { HandlerOutcome, RouteContext, RouteResult } from "./routes.js";
+import { errorBody, STREAMING } from "./routes.js";
 import type { UiHandlerDeps } from "./deps.js";
 import { currentGatewayConfig, currentGatewayEgressConfig } from "./deps.js";
 import { isVoiceDisabledByPolicy } from "./read-handlers.js";
@@ -604,10 +606,18 @@ function speechResult(value: TextToSpeechSuccess): RouteResult {
   };
 }
 
-export async function handleVoiceSpeak(
+interface ResolvedSpeak {
+  readonly validated: ValidatedSpeech;
+  readonly provider: ModelProviderConfig;
+  readonly target: SpeechTarget;
+}
+
+// Shared front-matter for both speak routes: gate the capability, parse + validate the request, and
+// resolve the provider + voice target. Returns the resolved request, or a RouteResult to return as-is.
+async function resolveSpeakRequest(
   ctx: RouteContext,
   deps: UiHandlerDeps,
-): Promise<RouteResult> {
+): Promise<ResolvedSpeak | RouteResult> {
   const gated = gateSpeechOutput(deps);
   if (isRouteResult(gated)) {
     return gated;
@@ -628,7 +638,103 @@ export async function handleVoiceSpeak(
   if (provider === undefined) {
     return speechUnavailable(deps);
   }
+  return { validated, provider, target };
+}
+
+export async function handleVoiceSpeak(
+  ctx: RouteContext,
+  deps: UiHandlerDeps,
+): Promise<RouteResult> {
+  const resolved = await resolveSpeakRequest(ctx, deps);
+  if (isRouteResult(resolved)) {
+    return resolved;
+  }
   const synthesize = deps.voiceSpeechRequest ?? requestTextToSpeech;
-  const outcome = await synthesize(buildTtsRequest(provider, target, validated, deps));
+  const outcome = await synthesize(
+    buildTtsRequest(resolved.provider, resolved.target, resolved.validated, deps),
+  );
   return outcome.ok ? speechResult(outcome.value) : speechProviderErrorResult(deps, outcome.kind);
 }
+
+// The streaming speak path requests raw PCM (the fastest provider format to first audio) and forwards
+// the bytes to the browser un-buffered (no base64 JSON envelope) for AudioWorklet start-on-first-chunk
+// playback. The buffered /api/voice/speak route stays as the universal fallback.
+const STREAM_SPEECH_FORMAT = "pcm" as const;
+
+function buildStreamTtsRequest(
+  resolved: ResolvedSpeak,
+  deps: UiHandlerDeps,
+  signal: AbortSignal,
+): TextToSpeechRequest {
+  return {
+    ...buildTtsRequest(resolved.provider, resolved.target, resolved.validated, deps),
+    responseFormat: STREAM_SPEECH_FORMAT,
+    signal,
+  };
+}
+
+// Aborts the synthesis when the client disconnects (res "close" is the canonical signal), so a barge-in
+// or navigation stops the provider stream rather than producing audio no one will hear.
+function abortOnResClose(ctx: RouteContext): AbortController {
+  const controller = new AbortController();
+  ctx.res.on("close", () => {
+    controller.abort();
+  });
+  return controller;
+}
+
+// Pipes the provider audio stream to the response honoring backpressure (res.write → false aborts) and
+// client disconnect. Once 200 + audio headers are sent no JSON error is possible, so a mid-stream
+// failure just ends the partial stream — the client falls back to the buffered route on the next turn.
+async function pipeAudioStream(
+  ctx: RouteContext,
+  body: ReadableStream<Uint8Array>,
+  controller: AbortController,
+): Promise<void> {
+  const reader = body.getReader();
+  try {
+    for (;;) {
+      const { done, value } = await reader.read();
+      if (done || controller.signal.aborted) {
+        break;
+      }
+      if (!ctx.res.write(value)) {
+        controller.abort();
+        ctx.res.destroy();
+        break;
+      }
+    }
+  } catch {
+    // partial stream — ended in finally
+  } finally {
+    try {
+      await reader.cancel();
+    } catch {
+      // already released
+    }
+    ctx.res.end();
+  }
+}
+
+export async function handleVoiceSpeakStream(
+  ctx: RouteContext,
+  deps: UiHandlerDeps,
+): Promise<HandlerOutcome> {
+  const resolved = await resolveSpeakRequest(ctx, deps);
+  if (isRouteResult(resolved)) {
+    return resolved;
+  }
+  const controller = abortOnResClose(ctx);
+  const synthesizeStream: (request: TextToSpeechRequest) => Promise<TextToSpeechStreamOutcome> =
+    deps.voiceSpeechStreamRequest ?? requestTextToSpeechStream;
+  const outcome = await synthesizeStream(buildStreamTtsRequest(resolved, deps, controller.signal));
+  if (!outcome.ok) {
+    return speechProviderErrorResult(deps, outcome.kind);
+  }
+  const mimeType = ALLOWED_SPEECH_MIME.has(outcome.value.mimeType)
+    ? outcome.value.mimeType
+    : DEFAULT_SPEECH_MIME;
+  ctx.res.writeHead(200, { "Content-Type": mimeType, "Cache-Control": "no-store" });
+  await pipeAudioStream(ctx, outcome.value.body, controller);
+  return STREAMING;
+}
diff --git a/packages/keiko-ui/public/keiko-playback-worklet.js b/packages/keiko-ui/public/keiko-playback-worklet.js
new file mode 100644
index 00000000..9560c065
--- /dev/null
+++ b/packages/keiko-ui/public/keiko-playback-worklet.js
@@ -0,0 +1,156 @@
+// Keiko gapless PCM playback worklet (Issue #1556). Plays streamed assistant-speech audio sample-
+// accurately on the audio render thread so the buffered whole-clip <audio> path can be replaced by a
+// start-on-first-chunk path with instant barge-in.
+//
+// Hardened beyond a naive demo: a pre-allocated Float32 ring buffer (no per-message array spread, no
+// per-quantum allocation), a configurable prime threshold so playback does not start mid-underrun, and
+// a frames-played counter so the main thread has a precise media position for the interrupt offset.
+//
+// Wire protocol (main thread -> worklet, via port.postMessage):
+//   { type: "config", primeFrames }   set the prime threshold (samples buffered before output starts)
+//   Int16Array                        enqueue mono PCM samples at the AudioContext sample rate
+//   { type: "end" }                   no more samples will be sent; emit "ended" once the buffer drains
+//   null | { type: "flush" }          clear the buffer immediately (barge-in / stop) — sub-frame
+// Worklet -> main thread:
+//   { type: "position", frames }      periodic frames-played report
+//   { type: "ended" }                 the buffer drained after the "end" marker (natural completion)
+//
+// Raw audio is transient render-thread data only; nothing is persisted.
+
+class KeikoPlaybackProcessor extends AudioWorkletProcessor {
+  constructor() {
+    super();
+    this.capacity = 0;
+    this.ring = null; // Float32Array
+    this.head = 0;
+    this.tail = 0;
+    this.size = 0;
+    this.primeFrames = 2400; // ~100ms at 24kHz; overridden by the config message
+    this.primed = false;
+    this.draining = false;
+    this.everPlayed = false;
+    this.ended = false;
+    this.framesPlayed = 0;
+    this.sinceReport = 0;
+    this.port.onmessage = (event) => {
+      this.handle(event.data);
+    };
+  }
+
+  ensureCapacity(extra) {
+    const need = this.size + extra;
+    if (this.ring !== null && need <= this.capacity) {
+      return;
+    }
+    let cap = this.capacity === 0 ? 1 << 15 : this.capacity;
+    while (cap < need) {
+      cap *= 2;
+    }
+    const next = new Float32Array(cap);
+    for (let i = 0; i < this.size; i += 1) {
+      next[i] = this.ring[(this.head + i) % this.capacity];
+    }
+    this.ring = next;
+    this.capacity = cap;
+    this.head = 0;
+    this.tail = this.size;
+  }
+
+  reset() {
+    this.head = 0;
+    this.tail = 0;
+    this.size = 0;
+    this.primed = false;
+    this.draining = false;
+    this.ended = false;
+  }
+
+  handle(data) {
+    if (data === null || (typeof data === "object" && data.type === "flush")) {
+      this.reset();
+      return;
+    }
+    if (typeof data === "object" && data.type === "config") {
+      if (typeof data.primeFrames === "number" && data.primeFrames >= 0) {
+        this.primeFrames = data.primeFrames;
+      }
+      return;
+    }
+    if (typeof data === "object" && data.type === "end") {
+      this.draining = true;
+      // Force any sub-prime remainder to play out, then complete once drained.
+      this.primed = true;
+      if (this.size === 0) {
+        this.finish();
+      }
+      return;
+    }
+    // Otherwise: an Int16Array of PCM samples.
+    const pcm = data;
+    const n = pcm.length;
+    if (n === 0) {
+      return;
+    }
+    this.ensureCapacity(n);
+    for (let i = 0; i < n; i += 1) {
+      this.ring[this.tail] = pcm[i] / 32768;
+      this.tail = (this.tail + 1) % this.capacity;
+    }
+    this.size += n;
+    if (this.size >= this.primeFrames) {
+      this.primed = true;
+    }
+  }
+
+  finish() {
+    if (this.ended) {
+      return;
+    }
+    this.ended = true;
+    this.port.postMessage({ type: "ended" });
+  }
+
+  process(_inputs, outputs) {
+    const channel = outputs[0][0];
+    if (channel === undefined) {
+      return true;
+    }
+    const need = channel.length;
+
+    // Before the prime threshold is reached, output silence (the source node stays alive).
+    if (!this.primed) {
+      channel.fill(0);
+      return true;
+    }
+
+    let i = 0;
+    for (; i < need && this.size > 0; i += 1) {
+      channel[i] = this.ring[this.head];
+      this.head = (this.head + 1) % this.capacity;
+      this.size -= 1;
+    }
+    const produced = i;
+    for (; i < need; i += 1) {
+      channel[i] = 0; // underrun → silence rather than a glitch
+    }
+
+    if (produced > 0) {
+      this.everPlayed = true;
+      this.framesPlayed += produced;
+      this.sinceReport += produced;
+      // Report position roughly every ~50ms so the main thread has a fresh media offset.
+      if (this.sinceReport >= 1200) {
+        this.sinceReport = 0;
+        this.port.postMessage({ type: "position", frames: this.framesPlayed });
+      }
+    }
+
+    // Natural completion: the sender marked the end and the buffer has fully drained.
+    if (this.draining && this.size === 0) {
+      this.finish();
+    }
+    return true;
+  }
+}
+
+registerProcessor("keiko-playback", KeikoPlaybackProcessor);
diff --git a/packages/keiko-ui/src/app/components/desktop/hooks/assistant-speech-streaming.test.ts b/packages/keiko-ui/src/app/components/desktop/hooks/assistant-speech-streaming.test.ts
new file mode 100644
index 00000000..e89bcdee
--- /dev/null
+++ b/packages/keiko-ui/src/app/components/desktop/hooks/assistant-speech-streaming.test.ts
@@ -0,0 +1,49 @@
+// Issue #1556 — streamed assistant-speech sink. The PCM byte→sample conversion (the tricky part:
+// little-endian decoding + carrying a sample split across network chunks) is unit-tested directly; the
+// browser sink is verified to be inert without WebAudio (jsdom), where the engine falls back to the
+// buffered path. The streaming wiring itself is exercised through useAssistantSpeech with a fake sink.
+
+import { describe, expect, it } from "vitest";
+import {
+  createBrowserAssistantSpeechStreamingSink,
+  pcmBytesToInt16,
+} from "./assistant-speech-streaming";
+
+// Little-endian bytes for the samples [1, -1, 0x1234].
+function leBytes(samples: number[]): Uint8Array {
+  const out = new Uint8Array(samples.length * 2);
+  const view = new DataView(out.buffer);
+  samples.forEach((s, i) => view.setInt16(i * 2, s, true));
+  return out;
+}
+
+describe("pcmBytesToInt16", () => {
+  it("decodes little-endian PCM16 bytes to Int16 samples", () => {
+    const { samples, leftover } = pcmBytesToInt16(leBytes([1, -1, 0x1234, -32768]), undefined);
+    expect(Array.from(samples)).toEqual([1, -1, 0x1234, -32768]);
+    expect(leftover).toBeUndefined();
+  });
+
+  it("carries a trailing odd byte forward so a split sample is not corrupted", () => {
+    const full = leBytes([0x1234, 0x55aa]); // 4 bytes
+    // First chunk cuts the second sample in half (3 bytes), second chunk delivers the rest.
+    const a = pcmBytesToInt16(full.slice(0, 3), undefined);
+    expect(Array.from(a.samples)).toEqual([0x1234]);
+    expect(a.leftover).toHaveLength(1);
+    const b = pcmBytesToInt16(full.slice(3), a.leftover);
+    expect(Array.from(b.samples)).toEqual([0x55aa]);
+    expect(b.leftover).toBeUndefined();
+  });
+
+  it("yields no samples for a single leftover byte and keeps it", () => {
+    const { samples, leftover } = pcmBytesToInt16(new Uint8Array([0x34]), undefined);
+    expect(samples).toHaveLength(0);
+    expect(leftover).toEqual(new Uint8Array([0x34]));
+  });
+});
+
+describe("createBrowserAssistantSpeechStreamingSink", () => {
+  it("returns undefined when WebAudio/AudioWorklet is unavailable (jsdom → buffered fallback)", () => {
+    expect(createBrowserAssistantSpeechStreamingSink()).toBeUndefined();
+  });
+});
diff --git a/packages/keiko-ui/src/app/components/desktop/hooks/assistant-speech-streaming.ts b/packages/keiko-ui/src/app/components/desktop/hooks/assistant-speech-streaming.ts
new file mode 100644
index 00000000..eae698f4
--- /dev/null
+++ b/packages/keiko-ui/src/app/components/desktop/hooks/assistant-speech-streaming.ts
@@ -0,0 +1,207 @@
+// Issue #1556 — streamed assistant-speech playback via an AudioWorklet PCM sink. Replaces the buffered
+// whole-clip <audio> path (which cannot start until the entire MP3 is synthesized + transferred) with a
+// start-on-first-chunk path: the BFF streams raw PCM, the bytes are fed to the keiko-playback worklet,
+// and audio begins after a small jitter prime. Barge-in is sub-frame (post `null` to flush the queue),
+// and the worklet reports a precise media position for the interrupt offset.
+//
+// It is an injectable seam (like the dictation recorder and realtime audio sink): production wires a
+// real AudioContext + worklet; the factory returns `undefined` when WebAudio/AudioWorklet is
+// unavailable (e.g. jsdom under test, or an old browser), and the caller falls back to the buffered
+// path. Raw audio is transient render-thread data and is never persisted.
+
+import type { VoicePersona } from "@oscharko-dev/keiko-contracts";
+import { streamAssistantSpeech } from "@/lib/api";
+
+export interface AssistantSpeechStreamHandlers {
+  // Fired when audible output actually begins (the worklet confirms it produced samples).
+  readonly onStart: () => void;
+  // Fired at natural completion (the stream ended and the buffer drained).
+  readonly onEnded: () => void;
+  // Fired on any non-cancellation failure; the written answer stays visible (AC4).
+  readonly onError: () => void;
+}
+
+export interface AssistantSpeechStreamingSink {
+  // Streams + plays `input`. Resolves true when this sink took over playback (the caller must NOT also
+  // run the buffered path); false when streaming is unsupported up front (caller falls back). An abort
+  // mid-stream resolves true (engaged) and is treated as a silent cancel, not an error.
+  play(
+    input: { readonly text: string; readonly persona?: VoicePersona },
+    signal: AbortSignal,
+    handlers: AssistantSpeechStreamHandlers,
+  ): Promise<boolean>;
+  // Immediate flush for stop / mute / interrupt (barge-in).
+  stop(): void;
+  // Live media position in ms (frames played ÷ sample rate), or undefined when nothing has played.
+  positionMs(): number | undefined;
+}
+
+// The provider streams 24 kHz mono signed-16-bit PCM (the fastest format to first audio). The worklet
+// plays samples 1:1 at the AudioContext rate, so the context is created at the same rate (no resample).
+const TARGET_SAMPLE_RATE = 24_000;
+// ~100ms jitter buffer primed before the first output, so the first chunks never underrun.
+const PRIME_FRAMES = 2_400;
+const WORKLET_URL = "/keiko-playback-worklet.js";
+const WORKLET_NAME = "keiko-playback";
+
+function streamingSupported(): boolean {
+  return typeof AudioContext !== "undefined" && typeof AudioWorkletNode !== "undefined";
+}
+
+function isAbortError(error: unknown): boolean {
+  return error instanceof DOMException && error.name === "AbortError";
+}
+
+// Converts a little-endian PCM16 byte chunk to Int16 samples, carrying any trailing odd byte forward so
+// a sample split across two network chunks is never corrupted. Returns the samples and the new leftover.
+export function pcmBytesToInt16(
+  chunk: Uint8Array,
+  leftover: Uint8Array | undefined,
+): { samples: Int16Array; leftover: Uint8Array | undefined } {
+  let bytes = chunk;
+  if (leftover !== undefined && leftover.length > 0) {
+    const merged = new Uint8Array(leftover.length + chunk.length);
+    merged.set(leftover);
+    merged.set(chunk, leftover.length);
+    bytes = merged;
+  }
+  const usable = bytes.length - (bytes.length % 2);
+  const nextLeftover = usable < bytes.length ? bytes.slice(usable) : undefined;
+  const samples = new Int16Array(usable / 2);
+  const view = new DataView(bytes.buffer, bytes.byteOffset, usable);
+  for (let i = 0; i < samples.length; i += 1) {
+    samples[i] = view.getInt16(i * 2, true);
+  }
+  return { samples, leftover: nextLeftover };
+}
+
+export function createBrowserAssistantSpeechStreamingSink():
+  | AssistantSpeechStreamingSink
+  | undefined {
+  if (!streamingSupported()) {
+    return undefined;
+  }
+  let context: AudioContext | undefined;
+  let node: AudioWorkletNode | undefined;
+  let positionFrames = 0;
+
+  async function ensureNode(): Promise<AudioWorkletNode> {
+    if (node !== undefined) {
+      return node;
+    }
+    const ctx = new AudioContext({ sampleRate: TARGET_SAMPLE_RATE });
+    await ctx.audioWorklet.addModule(WORKLET_URL);
+    const workletNode = new AudioWorkletNode(ctx, WORKLET_NAME, {
+      numberOfInputs: 0,
+      numberOfOutputs: 1,
+      outputChannelCount: [1],
+    });
+    workletNode.connect(ctx.destination);
+    context = ctx;
+    node = workletNode;
+    return workletNode;
+  }
+
+  async function pump(
+    workletNode: AudioWorkletNode,
+    body: ReadableStream<Uint8Array>,
+    signal: AbortSignal,
+    handlers: AssistantSpeechStreamHandlers,
+  ): Promise<void> {
+    const reader = body.getReader();
+    let leftover: Uint8Array | undefined;
+    let posted = 0;
+    try {
+      for (;;) {
+        const { done, value } = await reader.read();
+        if (done) {
+          break;
+        }
+        if (signal.aborted) {
+          await reader.cancel();
+          return;
+        }
+        const { samples, leftover: rest } = pcmBytesToInt16(value, leftover);
+        leftover = rest;
+        if (samples.length > 0) {
+          posted += samples.length;
+          workletNode.port.postMessage(samples, [samples.buffer]);
+        }
+      }
+      if (posted === 0) {
+        // A 200 with no audio: degrade to the visible text rather than waiting on a stream that will
+        // never play (the worklet emits no "ended" without samples).
+        handlers.onError();
+      } else {
+        workletNode.port.postMessage({ type: "end" });
+      }
+    } catch {
+      if (!signal.aborted) {
+        handlers.onError();
+      }
+    }
+  }
+
+  return {
+    async play(input, signal, handlers): Promise<boolean> {
+      let workletNode: AudioWorkletNode;
+      try {
+        workletNode = await ensureNode();
+      } catch {
+        return false; // worklet/context unavailable → caller falls back to the buffered path
+      }
+      if (signal.aborted) {
+        return true;
+      }
+      positionFrames = 0;
+      let started = false;
+      await context?.resume().catch(() => {
+        // a context that cannot resume still receives data; autoplay policy resolves on the user gesture
+      });
+      workletNode.port.postMessage({ type: "config", primeFrames: PRIME_FRAMES });
+      workletNode.port.onmessage = (event: MessageEvent): void => {
+        const data = event.data as { type?: string; frames?: number };
+        if (data.type === "position") {
+          positionFrames = data.frames ?? positionFrames;
+          if (!started) {
+            started = true;
+            handlers.onStart();
+          }
+        } else if (data.type === "ended") {
+          handlers.onEnded();
+        }
+      };
+
+      let response: Response;
+      try {
+        response = await streamAssistantSpeech(input, signal);
+      } catch (error) {
+        if (isAbortError(error)) {
+          return true; // cancelled mid-flight — do not fall back
+        }
+        // Any up-front failure (provider error, network) before audio has started: fall back to the
+        // buffered path so a turn is never lost and a stubbed/working buffered route still plays.
+        return false;
+      }
+      if (response.body === null) {
+        return false;
+      }
+      void pump(workletNode, response.body, signal, handlers);
+      return true;
+    },
+
+    stop(): void {
+      node?.port.postMessage(null); // flush the queue immediately (barge-in)
+      void context?.suspend().catch(() => {
+        // already suspended/closed
+      });
+    },
+
+    positionMs(): number | undefined {
+      if (node === undefined || positionFrames === 0) {
+        return undefined;
+      }
+      return Math.round((positionFrames / TARGET_SAMPLE_RATE) * 1000);
+    },
+  };
+}
diff --git a/packages/keiko-ui/src/app/components/desktop/hooks/useAssistantSpeech.test.ts b/packages/keiko-ui/src/app/components/desktop/hooks/useAssistantSpeech.test.ts
index 5b4e185f..781266c9 100644
--- a/packages/keiko-ui/src/app/components/desktop/hooks/useAssistantSpeech.test.ts
+++ b/packages/keiko-ui/src/app/components/desktop/hooks/useAssistantSpeech.test.ts
@@ -13,6 +13,10 @@ import {
   type AssistantSpeechAudioElement,
   type UseAssistantSpeechOptions,
 } from "./useAssistantSpeech";
+import type {
+  AssistantSpeechStreamHandlers,
+  AssistantSpeechStreamingSink,
+} from "./assistant-speech-streaming";
 
 // Issue #1559 persona routing is proved against the REAL production synthesize path: the BFF client
 // `synthesizeAssistantSpeech` is mocked so the hook's own `makeDefaultSynthesize(persona)` closure runs
@@ -455,3 +459,66 @@ describe("useAssistantSpeech — Issue #1559 persona routing", () => {
     );
   });
 });
+
+function makeFakeStreamingSink(engage: boolean): {
+  sink: AssistantSpeechStreamingSink;
+  handlers: () => AssistantSpeechStreamHandlers | undefined;
+  stops: () => number;
+} {
+  let captured: AssistantSpeechStreamHandlers | undefined;
+  let stops = 0;
+  return {
+    sink: {
+      play: (_input, _signal, handlers): Promise<boolean> => {
+        captured = handlers;
+        return Promise.resolve(engage);
+      },
+      stop: (): void => {
+        stops += 1;
+      },
+      positionMs: (): number | undefined => undefined,
+    },
+    handlers: () => captured,
+    stops: () => stops,
+  };
+}
+
+describe("useAssistantSpeech — streamed PCM playback", () => {
+  it("uses the streaming sink when it engages and drives the playback lifecycle (no buffered work)", async () => {
+    const fake = makeFakeStreamingSink(true);
+    const h = harness({ createStreamingSink: () => fake.sink });
+    const { result } = renderHook(() => useAssistantSpeech(h.options));
+    await flush();
+    // Streaming took over — the buffered synthesize + audio element are never touched.
+    expect(h.synthCalls).toHaveLength(0);
+    expect(h.audios).toHaveLength(0);
+    expect(result.current.snapshot.phase).toBe("preparing");
+
+    act(() => fake.handlers()?.onStart());
+    expect(result.current.snapshot.phase).toBe("speaking");
+    act(() => fake.handlers()?.onEnded());
+    expect(result.current.snapshot.phase).toBe("complete");
+  });
+
+  it("falls back to the buffered path when the streaming sink does not engage", async () => {
+    const fake = makeFakeStreamingSink(false);
+    const h = harness({ createStreamingSink: () => fake.sink });
+    const { result } = renderHook(() => useAssistantSpeech(h.options));
+    await flush();
+    await flush();
+    expect(h.synthCalls).toHaveLength(1);
+    expect(h.audios).toHaveLength(1);
+    act(() => h.audios[0]?.firePlaying());
+    expect(result.current.snapshot.phase).toBe("speaking");
+  });
+
+  it("flushes the streaming sink on stop (sub-frame barge-in)", async () => {
+    const fake = makeFakeStreamingSink(true);
+    const h = harness({ createStreamingSink: () => fake.sink });
+    const { result } = renderHook(() => useAssistantSpeech(h.options));
+    await flush();
+    act(() => fake.handlers()?.onStart());
+    act(() => result.current.stop());
+    expect(fake.stops()).toBeGreaterThanOrEqual(1);
+  });
+});
diff --git a/packages/keiko-ui/src/app/components/desktop/hooks/useAssistantSpeech.ts b/packages/keiko-ui/src/app/components/desktop/hooks/useAssistantSpeech.ts
index b0dc5208..4b24ee62 100644
--- a/packages/keiko-ui/src/app/components/desktop/hooks/useAssistantSpeech.ts
+++ b/packages/keiko-ui/src/app/components/desktop/hooks/useAssistantSpeech.ts
@@ -21,6 +21,10 @@ import { ApiError, synthesizeAssistantSpeech, type VoiceSpeechResult } from "@/l
 import type { VoiceTurnManagerEngine } from "./voice-turn-manager";
 import { useVoicePlayback, type VoicePlaybackBinding } from "./useVoicePlayback";
 import type { VoicePlaybackFailureKind } from "./voice-playback-state";
+import {
+  createBrowserAssistantSpeechStreamingSink,
+  type AssistantSpeechStreamingSink,
+} from "./assistant-speech-streaming";
 
 // The minimal audio-element surface the engine drives. `HTMLAudioElement` satisfies it structurally,
 // so production passes `new Audio()`; tests inject a controllable fake without a real media element.
@@ -62,6 +66,10 @@ export interface UseAssistantSpeechOptions {
   readonly createAudio?: (() => AssistantSpeechAudioElement) | undefined;
   readonly createObjectUrl?: ((blob: Blob) => string) | undefined;
   readonly revokeObjectUrl?: ((url: string) => void) | undefined;
+  // Optional streamed-PCM playback sink (AudioWorklet). When it engages, audio starts on the first
+  // chunk and barge-in is sub-frame; when it is unavailable (no WebAudio, e.g. under test) or fails up
+  // front, the engine falls back to the buffered <audio> path below. Tests inject a fake sink.
+  readonly createStreamingSink?: (() => AssistantSpeechStreamingSink | undefined) | undefined;
 }
 
 // Builds the default BFF synthesis seam bound to the current persona. A persona is included in the
@@ -149,12 +157,29 @@ export function useAssistantSpeech(options: UseAssistantSpeechOptions): VoicePla
   const handledRef = useRef<HandledTurn>({ id: undefined, nonce: 0 });
   const [replayNonce, setReplayNonce] = useState(0);
 
+  // Created once: the streamed-PCM sink, or undefined when WebAudio/AudioWorklet is unavailable (e.g.
+  // under test) — in which case the engine always uses the buffered path below.
+  const streamingSinkInitRef = useRef(false);
+  const streamingSinkRef = useRef<AssistantSpeechStreamingSink | undefined>(undefined);
+  if (!streamingSinkInitRef.current) {
+    streamingSinkInitRef.current = true;
+    streamingSinkRef.current = (
+      options.createStreamingSink ?? createBrowserAssistantSpeechStreamingSink
+    )();
+  }
+  // Read the current persona inside the engine effect without making it an effect dependency (mirrors
+  // the synthesizeRef pattern, so a persona change never re-triggers a turn that handledRef already owns).
+  const personaRef = useRef(persona);
+  personaRef.current = persona;
+
   // Releases the audio element, revokes the object URL, and aborts any pending synthesis fetch. Safe to
   // call repeatedly: every reference is cleared and re-checked. This is the single teardown used by the
   // effect cleanup (unmount / message switch) and by the stop / mute / interrupt controls (AC3).
   const teardown = useCallback(() => {
     abortRef.current?.abort();
     abortRef.current = null;
+    // Flush the streamed-PCM queue immediately (sub-frame barge-in) when one is active.
+    streamingSinkRef.current?.stop();
     const audio = audioRef.current;
     if (audio !== null) {
       audio.onplaying = null;
@@ -203,54 +228,98 @@ export function useAssistantSpeech(options: UseAssistantSpeechOptions): VoicePla
     let cancelled = false;
     const controller = new AbortController();
     abortRef.current = controller;
-    const audio = createAudioRef.current();
-    audio.muted = false;
-    audioRef.current = audio;
     pb.prepare();
 
-    Promise.resolve(synthesizeRef.current(text, controller.signal))
-      .then((result) => {
-        // `cancelled` covers an effect re-run / unmount; `controller.signal.aborted` covers a stop /
-        // mute / interrupt control that aborted this turn. Either way a late provider answer must not
-        // start playback on a turn that is already gone.
-        if (cancelled || controller.signal.aborted) {
-          return;
-        }
-        const blob = new Blob([decodeBase64(result.audio)], { type: result.mimeType });
-        const url = createUrlRef.current(blob);
-        urlRef.current = url;
-        audio.src = url;
-        audio.onplaying = (): void => {
-          if (!cancelled) {
-            playbackRef.current.playStarted();
+    // The buffered fallback: synthesize the whole clip, then play it through one HTMLAudioElement.
+    // `cancelled` covers an effect re-run / unmount; `controller.signal.aborted` covers a stop / mute /
+    // interrupt that aborted this turn — either way a late provider answer must not start playback.
+    const runBuffered = (): void => {
+      const audio = createAudioRef.current();
+      audio.muted = false;
+      audioRef.current = audio;
+      Promise.resolve(synthesizeRef.current(text, controller.signal))
+        .then((result) => {
+          if (cancelled || controller.signal.aborted) {
+            return undefined;
           }
-        };
-        audio.onended = (): void => {
-          if (!cancelled) {
-            playbackRef.current.complete();
-            teardown();
+          const blob = new Blob([decodeBase64(result.audio)], { type: result.mimeType });
+          const url = createUrlRef.current(blob);
+          urlRef.current = url;
+          audio.src = url;
+          audio.onplaying = (): void => {
+            if (!cancelled) {
+              playbackRef.current.playStarted();
+            }
+          };
+          audio.onended = (): void => {
+            if (!cancelled) {
+              playbackRef.current.complete();
+              teardown();
+            }
+          };
+          audio.onerror = (): void => {
+            if (!cancelled) {
+              playbackRef.current.fail("internal");
+              teardown();
+            }
+          };
+          return Promise.resolve(audio.play()).catch((error: unknown) => {
+            if (!cancelled && !isAbortError(error)) {
+              playbackRef.current.fail("internal");
+              teardown();
+            }
+          });
+        })
+        .catch((error: unknown) => {
+          if (cancelled || isAbortError(error)) {
+            return;
           }
-        };
-        audio.onerror = (): void => {
-          if (!cancelled) {
-            playbackRef.current.fail("internal");
-            teardown();
+          playbackRef.current.fail(failureFromError(error));
+          teardown();
+        });
+    };
+
+    const sink = streamingSinkRef.current;
+    if (sink === undefined) {
+      runBuffered();
+    } else {
+      // Try the streamed-PCM sink first; if it does not engage (unsupported / failed to start), fall
+      // back to the buffered path so a turn is never silently dropped.
+      void sink
+        .play(
+          { text, ...(personaRef.current !== undefined ? { persona: personaRef.current } : {}) },
+          controller.signal,
+          {
+            onStart: (): void => {
+              if (!cancelled) {
+                playbackRef.current.playStarted();
+              }
+            },
+            onEnded: (): void => {
+              if (!cancelled) {
+                playbackRef.current.complete();
+                teardown();
+              }
+            },
+            onError: (): void => {
+              if (!cancelled && !controller.signal.aborted) {
+                playbackRef.current.fail("internal");
+                teardown();
+              }
+            },
+          },
+        )
+        .then((engaged) => {
+          if (!engaged && !cancelled && !controller.signal.aborted) {
+            runBuffered();
           }
-        };
-        return Promise.resolve(audio.play()).catch((error: unknown) => {
-          if (!cancelled && !isAbortError(error)) {
-            playbackRef.current.fail("internal");
-            teardown();
+        })
+        .catch(() => {
+          if (!cancelled && !controller.signal.aborted) {
+            runBuffered();
           }
         });
-      })
-      .catch((error: unknown) => {
-        if (cancelled || isAbortError(error)) {
-          return;
-        }
-        playbackRef.current.fail(failureFromError(error));
-        teardown();
-      });
+    }
 
     return () => {
       cancelled = true;
diff --git a/packages/keiko-ui/src/lib/api.ts b/packages/keiko-ui/src/lib/api.ts
index 2f12a61e..3f14666f 100644
--- a/packages/keiko-ui/src/lib/api.ts
+++ b/packages/keiko-ui/src/lib/api.ts
@@ -280,6 +280,39 @@ export async function synthesizeAssistantSpeech(
   });
 }
 
+// Streaming synthesis: returns the raw Response so the caller can read `response.body` as PCM chunks
+// (AudioWorklet playback). The same CSRF + JSON-request envelope applies; a non-2xx is parsed into an
+// ApiError exactly like fetchJson so the caller can fall back to the buffered route. Abortable — on a
+// stop / mute / barge-in the fetch throws and the caller treats it as a silent cancel.
+export async function streamAssistantSpeech(
+  input: VoiceSpeechRequest,
+  signal?: AbortSignal,
+): Promise<Response> {
+  const res = await fetch("/api/voice/speak/stream", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      "X-Keiko-CSRF": "1",
+      Accept: "audio/pcm",
+    },
+    body: JSON.stringify(input),
+    ...(signal === undefined ? {} : { signal }),
+  });
+  if (!res.ok) {
+    let code = "INTERNAL";
+    let message = `HTTP ${res.status.toString()}`;
+    try {
+      const envelope = (await res.json()) as BffError;
+      code = envelope.error.code;
+      message = envelope.error.message;
+    } catch {
+      // parse failure — keep generic message, never log body
+    }
+    throw new ApiError(code, message, res.status);
+  }
+  return res;
+}
+
 export interface GatewaySetupInput {
   readonly baseUrl?: string | undefined;
   readonly apiKey?: string | undefined;
diff --git a/scripts/root-package-surface.contract.json b/scripts/root-package-surface.contract.json
index 65accfaa..6517652a 100644
--- a/scripts/root-package-surface.contract.json
+++ b/scripts/root-package-surface.contract.json
@@ -193,6 +193,7 @@
     "requestRealtimeNegotiation",
     "requestSpeechToText",
     "requestTextToSpeech",
+    "requestTextToSpeechStream",
     "resolveCostClass",
     "resolveOutboundHttpEgressConfig",
     "resolveRealtimeVoice",
@@ -508,6 +509,8 @@
     "TextToSpeechErrorKind",
     "TextToSpeechOutcome",
     "TextToSpeechRequest",
+    "TextToSpeechStreamOutcome",
+    "TextToSpeechStreamSuccess",
     "TextToSpeechSuccess",
     "TimeoutError",
     "ToolCallCompletedEvent",
@@ -677,6 +680,7 @@
     "requestRealtimeNegotiation",
     "requestSpeechToText",
     "requestTextToSpeech",
+    "requestTextToSpeechStream",
     "resolveCostClass",
     "resolveOutboundHttpEgressConfig",
     "resolveRealtimeVoice",