Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/docs/integrations/openai.md
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ Diagnose with:
result = m.instruct("What is 2 + 2?")
print(repr(result.value)) # ''
print(result.generation.usage) # {'completion_tokens': 9, ...}
print(result._thinking) # populated reasoning content, if any
print(result.thinking) # populated reasoning content, if any
```

This affects models that default to thinking mode, most commonly Qwen3 served
Expand All @@ -383,7 +383,7 @@ m = MelleaSession(

Other inference servers expose the same control under different names — check
your runtime's documentation. If you intend to use thinking mode, read the
reasoning trace from `result._thinking` rather than `result.value`.
reasoning trace from `result.thinking` rather than `result.value`.

---

Expand Down
8 changes: 4 additions & 4 deletions mellea/backends/litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,8 +470,8 @@ async def processing(
chunk (litellm.ModelResponse | litellm.ModelResponseStream): A single
response object or streaming chunk from LiteLLM.
"""
if mot._thinking is None:
mot._thinking = ""
if mot.thinking is None:
mot.thinking = ""
if mot._underlying_value is None:
mot._underlying_value = ""

Expand All @@ -490,7 +490,7 @@ async def processing(
if thinking_chunk is None:
thinking_chunk = message.get("reasoning")
if thinking_chunk is not None:
mot._thinking += thinking_chunk
mot.thinking += thinking_chunk

content_chunk = message.content
if content_chunk is not None:
Expand All @@ -509,7 +509,7 @@ async def processing(
if thinking_chunk is None:
thinking_chunk = message_delta.get("reasoning")
if thinking_chunk is not None:
mot._thinking += thinking_chunk
mot.thinking += thinking_chunk

content_chunk = message_delta.content
if content_chunk is not None:
Expand Down
6 changes: 3 additions & 3 deletions mellea/backends/ollama.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,11 +683,11 @@ async def processing(
tools (dict[str, AbstractMelleaTool]): Available tools, keyed by name,
used for extracting tool call requests from the response.
"""
if mot._thinking is None:
mot._thinking = ""
if mot.thinking is None:
mot.thinking = ""
thinking_chunk = chunk.message.thinking
if thinking_chunk is not None:
mot._thinking += thinking_chunk
mot.thinking += thinking_chunk

if mot._underlying_value is None:
mot._underlying_value = ""
Expand Down
8 changes: 4 additions & 4 deletions mellea/backends/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -1002,8 +1002,8 @@ async def processing(
chunk (ChatCompletion | ChatCompletionChunk): A single response object or
streaming delta from the OpenAI API.
"""
if mot._thinking is None:
mot._thinking = ""
if mot.thinking is None:
mot.thinking = ""
if mot._underlying_value is None:
mot._underlying_value = ""

Expand All @@ -1016,7 +1016,7 @@ async def processing(
if thinking_chunk is None:
thinking_chunk = (message.model_extra or {}).get("reasoning")
if thinking_chunk is not None:
mot._thinking += thinking_chunk
mot.thinking += thinking_chunk

content_chunk = message.content
if content_chunk is not None:
Expand All @@ -1041,7 +1041,7 @@ async def processing(
if thinking_chunk is None:
thinking_chunk = (message_delta.model_extra or {}).get("reasoning")
if thinking_chunk is not None:
mot._thinking += thinking_chunk
mot.thinking += thinking_chunk

content_chunk = message_delta.content
if content_chunk is not None:
Expand Down
8 changes: 4 additions & 4 deletions mellea/backends/watsonx.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,8 +484,8 @@ async def processing(self, mot: ModelOutputThunk, chunk: dict):
mot (ModelOutputThunk): The output thunk being populated.
chunk (dict): A single response dict or streaming delta from the WatsonX API.
"""
if mot._thinking is None:
mot._thinking = ""
if mot.thinking is None:
mot.thinking = ""
if mot._underlying_value is None:
mot._underlying_value = ""

Expand All @@ -499,7 +499,7 @@ async def processing(self, mot: ModelOutputThunk, chunk: dict):

thinking_chunk = message.get("reasoning_content", None)
if thinking_chunk is not None:
mot._thinking += thinking_chunk
mot.thinking += thinking_chunk

content_chunk = message.get("content", "")
if content_chunk is not None:
Expand All @@ -515,7 +515,7 @@ async def processing(self, mot: ModelOutputThunk, chunk: dict):

thinking_chunk = message_delta.get("reasoning_content", None)
if thinking_chunk is not None:
mot._thinking += thinking_chunk
mot.thinking += thinking_chunk

content_chunk = message_delta.get("content", None)
if content_chunk is not None:
Expand Down
29 changes: 25 additions & 4 deletions mellea/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import datetime
import enum
import logging
import warnings
from collections.abc import Callable, Coroutine, Iterable, Mapping
from copy import copy, deepcopy
from dataclasses import dataclass
Expand Down Expand Up @@ -394,7 +395,7 @@ def __init__(

# Additional fields that should be standardized across apis.
self.tool_calls = tool_calls
self._thinking: str | None = None
self.thinking: str | None = None
self.generation: GenerationMetadata = GenerationMetadata()
"""Backend execution metadata populated during generation."""

Expand Down Expand Up @@ -594,7 +595,7 @@ def _copy_from(self, other: ModelOutputThunk) -> None:
self._meta = other._meta
self.parsed_repr = other.parsed_repr
self.tool_calls = other.tool_calls
self._thinking = other._thinking
self.thinking = other.thinking
self.generation = other.generation
self._generate_log = other._generate_log
self._cancelled = other._cancelled
Expand All @@ -611,6 +612,26 @@ def is_computed(self) -> bool:
"""
return self._computed

@property
def _thinking(self) -> str | None:
"""Deprecated alias for :attr:`thinking`.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC I don't think this is the correct docstring format for the project (Google style)


Returns:
str | None: The model's reasoning/thinking trace.
"""
warnings.warn(
"`ModelOutputThunk._thinking` is deprecated and will be removed in a "
"future minor release. Use `ModelOutputThunk.thinking` instead.",
DeprecationWarning,
stacklevel=2,
)
return self.thinking

@_thinking.setter
def _thinking(self, value: str | None) -> None:

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should have the same warn as read

"""Deprecated write alias for :attr:`thinking`."""
self.thinking = value

@property
def value(self) -> str | None:
"""Gets the value of the block."""
Expand Down Expand Up @@ -829,7 +850,7 @@ def __copy__(self) -> ModelOutputThunk:
# _cancel_hook is not forwarded: a copied MOT is a distinct computation
# and must not share the original's backend thread signal.
copied._cancel_hook = None
copied._thinking = self._thinking
copied.thinking = self.thinking
copied._action = self._action
copied._context = self._context
copied._generate_log = self._generate_log
Expand Down Expand Up @@ -862,7 +883,7 @@ def __deepcopy__(self, memo: dict) -> ModelOutputThunk:
# _cancel_hook is not forwarded: a deepcopied MOT is a distinct computation
# and must not share the original's backend thread signal.
deepcopied._cancel_hook = None
deepcopied._thinking = self._thinking
deepcopied.thinking = self.thinking
deepcopied._action = deepcopy(self._action)
deepcopied._context = copy(
self._context
Expand Down
2 changes: 1 addition & 1 deletion mellea/stdlib/requirements/safety/guardian.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ async def validate(
await mot.avalue()

# Prefer explicit thinking if available, else try to split from output text.
trace = getattr(mot, "_thinking", None)
trace = mot.thinking
text = mot.value or ""
if trace is None and "</think>" in text:
parts = text.split("</think>")
Expand Down
22 changes: 11 additions & 11 deletions test/backends/test_litellm_thinking.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Unit tests for LiteLLMBackend mot._thinking population.
"""Unit tests for LiteLLMBackend mot.thinking population.

Covers the vLLM case where the wire key is ``"reasoning"`` instead of
``"reasoning_content"``, and the case where LiteLLM has already normalised
Expand Down Expand Up @@ -75,7 +75,7 @@ async def test_processing_non_streaming_reasoning_content_key(backend: LiteLLMBa
reasoning_value="France has its capital in Paris.",
)
await backend.processing(mot, chunk)
assert mot._thinking == "France has its capital in Paris."
assert mot.thinking == "France has its capital in Paris."
assert mot._underlying_value == "Paris"


Expand All @@ -88,7 +88,7 @@ async def test_processing_non_streaming_reasoning_raw_key(backend: LiteLLMBacken
reasoning_value="France has its capital in Paris.",
)
await backend.processing(mot, chunk)
assert mot._thinking == "France has its capital in Paris."
assert mot.thinking == "France has its capital in Paris."
assert mot._underlying_value == "Paris"


Expand All @@ -109,7 +109,7 @@ async def test_processing_non_streaming_reasoning_content_wins_over_reasoning(
object="chat.completion",
)
await backend.processing(mot, chunk)
assert mot._thinking == "from_reasoning_content"
assert mot.thinking == "from_reasoning_content"


async def test_processing_non_streaming_no_reasoning(backend: LiteLLMBackend):
Expand All @@ -121,7 +121,7 @@ async def test_processing_non_streaming_no_reasoning(backend: LiteLLMBackend):
reasoning_value="should be ignored",
)
await backend.processing(mot, chunk)
assert mot._thinking == ""
assert mot.thinking == ""
assert mot._underlying_value == "Paris"


Expand All @@ -147,7 +147,7 @@ async def test_processing_non_streaming_empty_reasoning_content_does_not_fall_ba
object="chat.completion",
)
await backend.processing(mot, chunk)
assert mot._thinking == ""
assert mot.thinking == ""


# ---------------------------------------------------------------------------
Expand All @@ -163,7 +163,7 @@ async def test_processing_streaming_reasoning_content_key(backend: LiteLLMBacken
content="", reasoning_key="reasoning_content", reasoning_value=text
)
await backend.processing(mot, stream_chunk)
assert mot._thinking == "chunk1 chunk2"
assert mot.thinking == "chunk1 chunk2"


async def test_processing_streaming_reasoning_raw_key(backend: LiteLLMBackend):
Expand All @@ -174,7 +174,7 @@ async def test_processing_streaming_reasoning_raw_key(backend: LiteLLMBackend):
content="", reasoning_key="reasoning", reasoning_value=text
)
await backend.processing(mot, stream_chunk)
assert mot._thinking == "chunk1 chunk2"
assert mot.thinking == "chunk1 chunk2"


async def test_processing_streaming_reasoning_content_wins_over_reasoning(
Expand All @@ -190,7 +190,7 @@ async def test_processing_streaming_reasoning_content_wins_over_reasoning(
id="test", choices=[chunk_choice], created=0, model="openai/qwen3"
)
await backend.processing(mot, stream_chunk)
assert mot._thinking == "from_reasoning_content"
assert mot.thinking == "from_reasoning_content"


async def test_processing_streaming_no_reasoning(backend: LiteLLMBackend):
Expand All @@ -200,7 +200,7 @@ async def test_processing_streaming_no_reasoning(backend: LiteLLMBackend):
content="Paris", reasoning_key="unrelated_key", reasoning_value="ignored"
)
await backend.processing(mot, stream_chunk)
assert mot._thinking == ""
assert mot.thinking == ""
assert mot._underlying_value == "Paris"


Expand All @@ -220,7 +220,7 @@ async def test_processing_streaming_empty_reasoning_content_does_not_fall_back(
id="test", choices=[chunk_choice], created=0, model="openai/qwen3"
)
await backend.processing(mot, stream_chunk)
assert mot._thinking == ""
assert mot.thinking == ""


# ---------------------------------------------------------------------------
Expand Down
12 changes: 12 additions & 0 deletions test/backends/test_ollama_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,18 @@ def test_delta_merge_thinking_concatenated():
assert mot._meta["chat_response"].message.thinking == "step 1 step 2"


@pytest.mark.asyncio
async def test_processing_initializes_and_accumulates_thinking(
backend: OllamaModelBackend,
):
"""processing() initializes thinking and accumulates chunk thinking text."""
mot = ModelOutputThunk(value=None)
await backend.processing(mot, _make_delta("answer", thinking="step 1"), {})

assert mot.thinking == "step 1"
assert mot._underlying_value == "answer"


# --- timeout wiring ---


Expand Down
14 changes: 7 additions & 7 deletions test/backends/test_openai_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,15 +193,15 @@ def _vllm_chat_completion(reasoning: str, content: str | None) -> ChatCompletion


async def test_processing_captures_vllm_reasoning_field(backend):
"""Non-streaming: mot._thinking captures the raw ``reasoning`` key from vLLM."""
"""Non-streaming: mot.thinking captures the raw ``reasoning`` key from vLLM."""
mot: ModelOutputThunk = ModelOutputThunk(value=None)
chunk = _vllm_chat_completion(reasoning="2 + 2 equals 4.", content="4")
# Sanity check: the SDK object does not expose reasoning_content
assert not hasattr(chunk.choices[0].message, "reasoning_content")

await backend.processing(mot, chunk)

assert mot._thinking == "2 + 2 equals 4."
assert mot.thinking == "2 + 2 equals 4."
assert mot._underlying_value == "4"


Expand All @@ -212,12 +212,12 @@ async def test_processing_vllm_reasoning_with_null_content(backend):

await backend.processing(mot, chunk)

assert mot._thinking == "some thinking"
assert mot.thinking == "some thinking"
assert mot._underlying_value == ""


async def test_processing_streaming_captures_vllm_reasoning_field(backend):
"""Streaming: per-chunk ``reasoning`` deltas accumulate into mot._thinking."""
"""Streaming: per-chunk ``reasoning`` deltas accumulate into mot.thinking."""
mot: ModelOutputThunk = ModelOutputThunk(value=None)
chunk_a = ChatCompletionChunk.model_validate(
{
Expand Down Expand Up @@ -257,7 +257,7 @@ async def test_processing_streaming_captures_vllm_reasoning_field(backend):
await backend.processing(mot, chunk_a)
await backend.processing(mot, chunk_b)

assert mot._thinking == "first second"
assert mot.thinking == "first second"
assert mot._underlying_value == "ans"


Expand Down Expand Up @@ -287,7 +287,7 @@ async def test_processing_reasoning_content_still_used(backend):
mot: ModelOutputThunk = ModelOutputThunk(value=None)
await backend.processing(mot, chunk)

assert mot._thinking == "attribute-style trace"
assert mot.thinking == "attribute-style trace"
assert mot._underlying_value == "answer"


Expand All @@ -311,7 +311,7 @@ async def test_processing_reasoning_content_takes_precedence_over_reasoning(back
mot: ModelOutputThunk = ModelOutputThunk(value=None)
await backend.processing(mot, chunk)

assert mot._thinking == "attr-trace"
assert mot.thinking == "attr-trace"
assert mot._underlying_value == "answer"


Expand Down
Loading
Loading