generative-computing · planetf1 · Jun 17, 2026 · Jun 17, 2026 · Jun 17, 2026 · Jun 17, 2026
@@ -44,13 +44,18 @@ def serve(
     """
     message = input[-1].get_text_content() or "No message provided"
 
-    # When format is provided (from json_schema response_format),
-    # pass it to instruct() to get structured output
-    result = session.instruct(
-        description=message,
-        requirements=requirements,  # type: ignore
-        model_options=model_options,
-        format=format,  # This enables structured output validation
-    )
+    if format is None:
+        result = session.instruct(
+            description=message,
+            requirements=requirements,  # type: ignore
+            model_options=model_options,
+        )
+    else:
+        result = session.instruct(
+            description=message,
+            requirements=requirements,  # type: ignore
+            model_options=model_options,
+            format=format,
+        )
 
     return result
@@ -1378,6 +1378,7 @@ class used during generation, if any.
         generate_log.result = mot
 
         mot._generate_log = generate_log
+        mot._format = _format
 
     async def _generate_from_raw(
         self,
@@ -1513,6 +1514,7 @@ async def _generate_from_raw(
             generate_log.action = action
 
             result._generate_log = generate_log
+            result._format = format
             results.append(result)
 
         usage: dict[str, Any] | None = (

@@ -596,6 +596,7 @@ async def post_processing(
         generate_log.action = mot._action
         generate_log.result = mot
         mot._generate_log = generate_log
+        mot._format = _format
 
         # Extract token usage from full response dict or streaming usage
         full_response = mot._meta.get("litellm_full_response")

@@ -613,6 +613,7 @@ async def _generate_from_raw(
                 generate_log.extra["error"] = error
                 generate_log.extra["empty_response"] = response.model_dump()
             result._generate_log = generate_log
+            result._format = format
 
             results.append(result)
 
@@ -742,6 +743,7 @@ async def post_processing(
         generate_log.result = mot
 
         mot._generate_log = generate_log
+        mot._format = _format
         mot._generate = None
 
         # Extract token counts from response

@@ -1127,6 +1127,7 @@ async def post_processing(
         generate_log.action = mot._action
         generate_log.result = mot
         mot._generate_log = generate_log
+        mot._format = _format
 
         # Extract token usage from response or streaming usage
         response = mot._meta["oai_chat_response"]

@@ -614,6 +614,7 @@ async def post_processing(
         generate_log.result = mot
         generate_log.action = mot._action
         mot._generate_log = generate_log
+        mot._format = _format
 
     async def _generate_from_raw(
         self,

@@ -29,9 +29,11 @@
     ParamSpec,
     Protocol,
     TypeVar,
+    cast,
     runtime_checkable,
 )
 
+import pydantic
 import typing_extensions
 from PIL import Image as PILImage
 
@@ -401,6 +403,7 @@ def __init__(
         # Mellea-side hook correlation ID; distinct from the provider-assigned
         # `GenerationMetadata.response_id`.
         self._generation_id: str | None = None
+        self._format: type[pydantic.BaseModel] | None = None
 
     def _record_ttfb(self) -> None:
         """Record time-to-first-byte if streaming and not yet recorded."""
@@ -542,6 +545,7 @@ def _copy_from(self, other: ModelOutputThunk) -> None:
         self._thinking = other._thinking
         self.generation = other.generation
         self._generate_log = other._generate_log
+        self._format = other._format
         self._cancelled = other._cancelled
         # _cancel_hook is deliberately not copied: _copy_from swaps output state,
         # not backend-thread plumbing, which is tied to the original computation.
@@ -557,7 +561,13 @@ def is_computed(self) -> bool:
 
     @property
     def value(self) -> str | None:
-        """Gets the value of the block."""
+        """Gets the raw string value of the block.
+
+        When ``format=`` is set on the originating ``act()``/``instruct()`` call, the
+        model returns a JSON string and ``.value`` contains that raw JSON — not a
+        Pydantic instance.  Use ``.parsed`` on a ``ComputedModelOutputThunk`` to get
+        the validated model object.
+        """
         if not self._computed:
             return None
         return self._underlying_value
@@ -776,6 +786,7 @@ def __copy__(self) -> ModelOutputThunk:
         copied._action = self._action
         copied._context = self._context
         copied._generate_log = self._generate_log
+        copied._format = self._format
         copied._model_options = self._model_options
         copied.generation = copy(self.generation)
         return copied
@@ -810,6 +821,7 @@ def __deepcopy__(self, memo: dict) -> ModelOutputThunk:
             self._context
         )  # The items in a context should be immutable.
         deepcopied._generate_log = copy(self._generate_log)
+        deepcopied._format = self._format
         deepcopied._model_options = copy(self._model_options)
         deepcopied.generation = deepcopy(self.generation)
         return deepcopied
@@ -873,14 +885,58 @@ async def astream(self) -> str:
 
     @property
     def value(self) -> str:
-        """Gets the value of the block."""
+        """Gets the raw string value of the block.
+
+        When ``format=`` is set on the originating ``act()``/``instruct()`` call, the
+        model returns a JSON string and ``.value`` contains that raw JSON — not a
+        Pydantic instance.  Use ``.parsed`` to get the validated model object.
+        """
         return self._underlying_value  # type: ignore
 
     @value.setter
     def value(self, v: str):
         """Sets the value of the block."""
         self._underlying_value = v
 
+    @property
+    def parsed(self) -> S | None:
+        """Returns the result as a validated Pydantic instance when ``format=`` was set.
+
+        The return type tracks the format type supplied at the call site.
+        Passing ``format=MyModel`` to ``act()`` or ``instruct()`` yields a
+        ``ComputedModelOutputThunk[MyModel]`` whose ``.parsed`` is typed
+        ``MyModel | None`` — no explicit ``cast()`` required::
+
+            result, _ = session.act(action, format=MyModel)
+            obj = result.parsed  # typed MyModel | None
+
+        Returns ``None`` when no ``format=`` type was provided.  Unlike
+        ``parsed_repr`` (which holds the action-specific parse result),
+        ``.parsed`` always re-validates the raw JSON string against ``_format``
+        via ``model_validate_json``.
+
+        Note:
+            This property relies on the originating backend storing the format
+            type on the thunk. Custom backend authors must set ``mot._format``
+            in their ``post_processing`` method (mirroring the built-in
+            backends); otherwise ``.parsed`` always returns ``None`` even when
+            ``format=`` was supplied.
+
+        Returns:
+            An instance of the format type (``S``) produced by
+            ``model_validate_json``, or ``None`` if no format type was set.
+
+        Raises:
+            pydantic.ValidationError: If the raw JSON value does not conform to
+                the format model (e.g. the model returned malformed structured output).
+        """
+        if self._format is None:
+            return None
+        # `_format` is always a pydantic model type; `model_validate_json` returns
+        # `pydantic.BaseModel` statically, but the caller's type parameter `S` is
+        # the concrete model when `format=` was used, so we cast the result to `S`.
+        return cast(S, self._format.model_validate_json(self.value))
+
     def is_computed(self) -> Literal[True]:
         """Returns `True` since thunk is always computed.
 

@@ -6,7 +6,16 @@
 from collections.abc import Awaitable, Callable, Coroutine
 from copy import deepcopy
 from dataclasses import dataclass, fields
-from typing import Any, Generic, ParamSpec, TypedDict, TypeVar, get_type_hints, overload
+from typing import (
+    Any,
+    Generic,
+    ParamSpec,
+    TypedDict,
+    TypeVar,
+    cast,
+    get_type_hints,
+    overload,
+)
 
 from pydantic import BaseModel, Field, create_model
 
@@ -653,10 +662,15 @@ def __call__(self, *args, **kwargs) -> tuple[R, Context] | R:
             )
 
         assert response.parsed_repr is not None
+        # GenerativeStub._parse calls model_validate_json and returns the unwrapped R,
+        # so parsed_repr is R at runtime. The thunk types it as S | None (where
+        # S = FunctionResponse[R]) because the overloads narrow S to the format type,
+        # not to R. cast makes the coercion explicit rather than suppressing it.
+        parsed = cast("R", response.parsed_repr)
         if context is None:
-            return response.parsed_repr
+            return parsed
         else:
-            return response.parsed_repr, context
+            return parsed, context
 
 
 class AsyncGenerativeStub(GenerativeStub, Generic[P, R]):
@@ -796,10 +810,13 @@ async def __async_call__() -> tuple[R, Context] | R:
                 "unexpectedly received uncomputed model output thunk in async generative stub"
             )
             assert response.parsed_repr is not None
+            # Same as SyncGenerativeStub: _parse returns the unwrapped R at runtime;
+            # cast makes the S → R coercion explicit.
+            parsed = cast("R", response.parsed_repr)
             if context is None:
-                return response.parsed_repr
+                return parsed
             else:
-                return response.parsed_repr, context
+                return parsed, context
 
         return __async_call__()
 

@@ -111,7 +111,16 @@ async def react(
             assert len(tool_responses) == 1, "multiple tools were called with 'final'"
 
             if format is not None:
-                step, next_context = await mfuncs.aact(
+                # `format` is a dynamic `type[BaseModelSubclass] | None` forwarded from
+                # the caller, which matches no single narrow aact() overload (those key
+                # off `format=None` vs `format=<type>` as distinct literals). We are
+                # already inside `if format is not None`, so the value is known non-None
+                # here, but mypy does not propagate that narrowing into the overload pick.
+                # The clean fix is for the caller to branch on `format is None` and call
+                # aact in each branch so each call matches a narrow overload; that is
+                # not worth the duplication for this single internal call site, so we
+                # accept the ignore.
+                step, next_context = await mfuncs.aact(  # type: ignore[assignment]  # dynamic format from caller
                     action=ReactThought(),
                     context=context,
                     backend=backend,