Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
acb02f9
feat(thunk): add .parsed property to ComputedModelOutputThunk for str…
planetf1 Jun 17, 2026
7370b83
feat(types): thread format= overloads for cast-free structured output
planetf1 Jun 17, 2026
0f77a9f
fix(thunk): propagate _format through __copy__/__deepcopy__; add Rais…
planetf1 Jun 17, 2026
83295e9
nit(thunk): soften parsed docstring example comment
planetf1 Jun 17, 2026
8e4af00
docs(types): annotate type: ignore sites with rationale
planetf1 Jun 17, 2026
0c3498a
fix(thunk): wire _format in HF chat post_processing; add missing tests
planetf1 Jun 22, 2026
4857a32
nit(test): explain subclass-loss caveat in copy/deepcopy _format tests
planetf1 Jun 22, 2026
5aa6921
fix(types): make ComputedModelOutputThunk.parsed generic over the for…
planetf1 Jun 22, 2026
6118cea
fix(types): use concrete _format type and non-string cast for pyright…
planetf1 Jun 22, 2026
87b4b2f
docs(thunk): tighten .parsed and ComputedModelOutputThunk.value docst…
planetf1 Jun 22, 2026
740293f
docs(types): expand genstub return-value ignore rationale
planetf1 Jun 22, 2026
cb2bbf1
test(types): assert attribute-level narrowing for act format= overload
planetf1 Jun 22, 2026
d055522
docs(types): explain intentional Any widening on act implementation
planetf1 Jun 22, 2026
91b9f7c
docs(types): document why format passthrough wrappers keep the ignore
planetf1 Jun 22, 2026
8c478e2
docs(types): note runtime/type mismatch on act format= overloads
planetf1 Jun 22, 2026
d853445
fix(types): branch on format is None in m_serve example to remove typ…
planetf1 Jun 22, 2026
c56d89d
fix(types): replace type: ignore[return-value] with cast in genstub
planetf1 Jun 22, 2026
c054f1c
fix(types): widen instruct/ainstruct format param to type[Any] in imp…
planetf1 Jun 23, 2026
353b227
feat(thunk+types): add .parsed property and wire format= overloads
planetf1 Jun 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions docs/examples/m_serve/m_serve_example_response_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,18 @@ def serve(
"""
message = input[-1].get_text_content() or "No message provided"

# When format is provided (from json_schema response_format),
# pass it to instruct() to get structured output
result = session.instruct(
description=message,
requirements=requirements, # type: ignore
model_options=model_options,
format=format, # This enables structured output validation
)
if format is None:
result = session.instruct(
description=message,
requirements=requirements, # type: ignore
model_options=model_options,
)
else:
result = session.instruct(
description=message,
requirements=requirements, # type: ignore
model_options=model_options,
format=format,
)

return result
2 changes: 2 additions & 0 deletions mellea/backends/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -1378,6 +1378,7 @@ class used during generation, if any.
generate_log.result = mot

mot._generate_log = generate_log
mot._format = _format

async def _generate_from_raw(
self,
Expand Down Expand Up @@ -1513,6 +1514,7 @@ async def _generate_from_raw(
generate_log.action = action

result._generate_log = generate_log
result._format = format
results.append(result)

usage: dict[str, Any] | None = (
Expand Down
1 change: 1 addition & 0 deletions mellea/backends/litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,7 @@ async def post_processing(
generate_log.action = mot._action
generate_log.result = mot
mot._generate_log = generate_log
mot._format = _format

# Extract token usage from full response dict or streaming usage
full_response = mot._meta.get("litellm_full_response")
Expand Down
2 changes: 2 additions & 0 deletions mellea/backends/ollama.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,7 @@ async def _generate_from_raw(
generate_log.extra["error"] = error
generate_log.extra["empty_response"] = response.model_dump()
result._generate_log = generate_log
result._format = format

results.append(result)

Expand Down Expand Up @@ -742,6 +743,7 @@ async def post_processing(
generate_log.result = mot

mot._generate_log = generate_log
mot._format = _format
mot._generate = None

# Extract token counts from response
Expand Down
1 change: 1 addition & 0 deletions mellea/backends/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -1127,6 +1127,7 @@ async def post_processing(
generate_log.action = mot._action
generate_log.result = mot
mot._generate_log = generate_log
mot._format = _format

# Extract token usage from response or streaming usage
response = mot._meta["oai_chat_response"]
Expand Down
1 change: 1 addition & 0 deletions mellea/backends/watsonx.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,7 @@ async def post_processing(
generate_log.result = mot
generate_log.action = mot._action
mot._generate_log = generate_log
mot._format = _format

async def _generate_from_raw(
self,
Expand Down
60 changes: 58 additions & 2 deletions mellea/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,11 @@
ParamSpec,
Protocol,
TypeVar,
cast,
runtime_checkable,
)

import pydantic
import typing_extensions
from PIL import Image as PILImage

Expand Down Expand Up @@ -401,6 +403,7 @@ def __init__(
# Mellea-side hook correlation ID; distinct from the provider-assigned
# `GenerationMetadata.response_id`.
self._generation_id: str | None = None
self._format: type[pydantic.BaseModel] | None = None

def _record_ttfb(self) -> None:
"""Record time-to-first-byte if streaming and not yet recorded."""
Expand Down Expand Up @@ -542,6 +545,7 @@ def _copy_from(self, other: ModelOutputThunk) -> None:
self._thinking = other._thinking
self.generation = other.generation
self._generate_log = other._generate_log
self._format = other._format
self._cancelled = other._cancelled
# _cancel_hook is deliberately not copied: _copy_from swaps output state,
# not backend-thread plumbing, which is tied to the original computation.
Expand All @@ -557,7 +561,13 @@ def is_computed(self) -> bool:

@property
def value(self) -> str | None:
"""Gets the value of the block."""
"""Gets the raw string value of the block.

When ``format=`` is set on the originating ``act()``/``instruct()`` call, the
model returns a JSON string and ``.value`` contains that raw JSON — not a
Pydantic instance. Use ``.parsed`` on a ``ComputedModelOutputThunk`` to get
the validated model object.
"""
if not self._computed:
return None
return self._underlying_value
Expand Down Expand Up @@ -776,6 +786,7 @@ def __copy__(self) -> ModelOutputThunk:
copied._action = self._action
copied._context = self._context
copied._generate_log = self._generate_log
copied._format = self._format
copied._model_options = self._model_options
copied.generation = copy(self.generation)
return copied
Expand Down Expand Up @@ -810,6 +821,7 @@ def __deepcopy__(self, memo: dict) -> ModelOutputThunk:
self._context
) # The items in a context should be immutable.
deepcopied._generate_log = copy(self._generate_log)
deepcopied._format = self._format
deepcopied._model_options = copy(self._model_options)
deepcopied.generation = deepcopy(self.generation)
return deepcopied
Expand Down Expand Up @@ -873,14 +885,58 @@ async def astream(self) -> str:

@property
def value(self) -> str:
"""Gets the value of the block."""
"""Gets the raw string value of the block.

When ``format=`` is set on the originating ``act()``/``instruct()`` call, the
model returns a JSON string and ``.value`` contains that raw JSON — not a
Pydantic instance. Use ``.parsed`` to get the validated model object.
"""
return self._underlying_value # type: ignore

@value.setter
def value(self, v: str):
"""Sets the value of the block."""
self._underlying_value = v

@property
def parsed(self) -> S | None:
"""Returns the result as a validated Pydantic instance when ``format=`` was set.

The return type tracks the format type supplied at the call site.
Passing ``format=MyModel`` to ``act()`` or ``instruct()`` yields a
``ComputedModelOutputThunk[MyModel]`` whose ``.parsed`` is typed
``MyModel | None`` — no explicit ``cast()`` required::

result, _ = session.act(action, format=MyModel)
obj = result.parsed # typed MyModel | None

Returns ``None`` when no ``format=`` type was provided. Unlike
``parsed_repr`` (which holds the action-specific parse result),
``.parsed`` always re-validates the raw JSON string against ``_format``
via ``model_validate_json``.

Note:
This property relies on the originating backend storing the format
type on the thunk. Custom backend authors must set ``mot._format``
in their ``post_processing`` method (mirroring the built-in
backends); otherwise ``.parsed`` always returns ``None`` even when
``format=`` was supplied.

Returns:
An instance of the format type (``S``) produced by
``model_validate_json``, or ``None`` if no format type was set.

Raises:
pydantic.ValidationError: If the raw JSON value does not conform to
the format model (e.g. the model returned malformed structured output).
"""
if self._format is None:
return None
# `_format` is always a pydantic model type; `model_validate_json` returns
# `pydantic.BaseModel` statically, but the caller's type parameter `S` is
# the concrete model when `format=` was used, so we cast the result to `S`.
return cast(S, self._format.model_validate_json(self.value))

def is_computed(self) -> Literal[True]:
"""Returns `True` since thunk is always computed.

Expand Down
27 changes: 22 additions & 5 deletions mellea/stdlib/components/genstub.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,16 @@
from collections.abc import Awaitable, Callable, Coroutine
from copy import deepcopy
from dataclasses import dataclass, fields
from typing import Any, Generic, ParamSpec, TypedDict, TypeVar, get_type_hints, overload
from typing import (
Any,
Generic,
ParamSpec,
TypedDict,
TypeVar,
cast,
get_type_hints,
overload,
)

from pydantic import BaseModel, Field, create_model

Expand Down Expand Up @@ -653,10 +662,15 @@ def __call__(self, *args, **kwargs) -> tuple[R, Context] | R:
)

assert response.parsed_repr is not None
# GenerativeStub._parse calls model_validate_json and returns the unwrapped R,
# so parsed_repr is R at runtime. The thunk types it as S | None (where
# S = FunctionResponse[R]) because the overloads narrow S to the format type,
# not to R. cast makes the coercion explicit rather than suppressing it.
parsed = cast("R", response.parsed_repr)
if context is None:
return response.parsed_repr
return parsed
else:
return response.parsed_repr, context
return parsed, context


class AsyncGenerativeStub(GenerativeStub, Generic[P, R]):
Expand Down Expand Up @@ -796,10 +810,13 @@ async def __async_call__() -> tuple[R, Context] | R:
"unexpectedly received uncomputed model output thunk in async generative stub"
)
assert response.parsed_repr is not None
# Same as SyncGenerativeStub: _parse returns the unwrapped R at runtime;
# cast makes the S → R coercion explicit.
parsed = cast("R", response.parsed_repr)
if context is None:
return response.parsed_repr
return parsed
else:
return response.parsed_repr, context
return parsed, context

return __async_call__()

Expand Down
11 changes: 10 additions & 1 deletion mellea/stdlib/frameworks/react.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,16 @@ async def react(
assert len(tool_responses) == 1, "multiple tools were called with 'final'"

if format is not None:
step, next_context = await mfuncs.aact(
# `format` is a dynamic `type[BaseModelSubclass] | None` forwarded from
# the caller, which matches no single narrow aact() overload (those key
# off `format=None` vs `format=<type>` as distinct literals). We are
# already inside `if format is not None`, so the value is known non-None
# here, but mypy does not propagate that narrowing into the overload pick.
# The clean fix is for the caller to branch on `format is None` and call
# aact in each branch so each call matches a narrow overload; that is
# not worth the duplication for this single internal call site, so we
# accept the ignore.
step, next_context = await mfuncs.aact( # type: ignore[assignment] # dynamic format from caller
action=ReactThought(),
context=context,
backend=backend,
Expand Down
Loading
Loading