From 40b97d63d548d402eb5473a861488516be410240 Mon Sep 17 00:00:00 2001
From: Phil Williams <phil.williams@ibm.com>
Date: Mon, 22 Jun 2026 13:53:20 -0400
Subject: [PATCH 1/2] refactor(core): promote ModelOutputThunk.thinking and
 deprecate _thinking alias

Signed-off-by: Phil Williams <phil.williams@ibm.com>
---
 docs/docs/integrations/openai.md              |  4 +--
 mellea/backends/litellm.py                    |  8 ++---
 mellea/backends/ollama.py                     |  6 ++--
 mellea/backends/openai.py                     |  8 ++---
 mellea/backends/watsonx.py                    |  8 ++---
 mellea/core/base.py                           | 29 ++++++++++++++++---
 mellea/stdlib/requirements/safety/guardian.py |  2 +-
 test/backends/test_litellm_thinking.py        | 22 +++++++-------
 test/backends/test_openai_unit.py             | 14 ++++-----
 test/core/test_base.py                        | 13 +++++++++
 10 files changed, 74 insertions(+), 40 deletions(-)

diff --git a/docs/docs/integrations/openai.md b/docs/docs/integrations/openai.md
index b37268c02..a7396fbe4 100644
--- a/docs/docs/integrations/openai.md
+++ b/docs/docs/integrations/openai.md
@@ -357,7 +357,7 @@ Diagnose with:
 result = m.instruct("What is 2 + 2?")
 print(repr(result.value))                    # ''
 print(result.generation.usage)               # {'completion_tokens': 9, ...}
-print(result._thinking)                      # populated reasoning content, if any
+print(result.thinking)                       # populated reasoning content, if any
 ```
 
 This affects models that default to thinking mode, most commonly Qwen3 served
@@ -383,7 +383,7 @@ m = MelleaSession(
 
 Other inference servers expose the same control under different names — check
 your runtime's documentation. If you intend to use thinking mode, read the
-reasoning trace from `result._thinking` rather than `result.value`.
+reasoning trace from `result.thinking` rather than `result.value`.
 
 ---
 
diff --git a/mellea/backends/litellm.py b/mellea/backends/litellm.py
index 0caee3eb0..0f3c24e66 100644
--- a/mellea/backends/litellm.py
+++ b/mellea/backends/litellm.py
@@ -470,8 +470,8 @@ async def processing(
             chunk (litellm.ModelResponse | litellm.ModelResponseStream): A single
                 response object or streaming chunk from LiteLLM.
         """
-        if mot._thinking is None:
-            mot._thinking = ""
+        if mot.thinking is None:
+            mot.thinking = ""
         if mot._underlying_value is None:
             mot._underlying_value = ""
 
@@ -490,7 +490,7 @@ async def processing(
             if thinking_chunk is None:
                 thinking_chunk = message.get("reasoning")
             if thinking_chunk is not None:
-                mot._thinking += thinking_chunk
+                mot.thinking += thinking_chunk
 
             content_chunk = message.content
             if content_chunk is not None:
@@ -509,7 +509,7 @@ async def processing(
             if thinking_chunk is None:
                 thinking_chunk = message_delta.get("reasoning")
             if thinking_chunk is not None:
-                mot._thinking += thinking_chunk
+                mot.thinking += thinking_chunk
 
             content_chunk = message_delta.content
             if content_chunk is not None:
diff --git a/mellea/backends/ollama.py b/mellea/backends/ollama.py
index 0a7c28297..a26bc77f5 100644
--- a/mellea/backends/ollama.py
+++ b/mellea/backends/ollama.py
@@ -683,11 +683,11 @@ async def processing(
             tools (dict[str, AbstractMelleaTool]): Available tools, keyed by name,
                 used for extracting tool call requests from the response.
         """
-        if mot._thinking is None:
-            mot._thinking = ""
+        if mot.thinking is None:
+            mot.thinking = ""
         thinking_chunk = chunk.message.thinking
         if thinking_chunk is not None:
-            mot._thinking += thinking_chunk
+            mot.thinking += thinking_chunk
 
         if mot._underlying_value is None:
             mot._underlying_value = ""
diff --git a/mellea/backends/openai.py b/mellea/backends/openai.py
index 3683a81a3..17839b516 100644
--- a/mellea/backends/openai.py
+++ b/mellea/backends/openai.py
@@ -1002,8 +1002,8 @@ async def processing(
             chunk (ChatCompletion | ChatCompletionChunk): A single response object or
                 streaming delta from the OpenAI API.
         """
-        if mot._thinking is None:
-            mot._thinking = ""
+        if mot.thinking is None:
+            mot.thinking = ""
         if mot._underlying_value is None:
             mot._underlying_value = ""
 
@@ -1016,7 +1016,7 @@ async def processing(
             if thinking_chunk is None:
                 thinking_chunk = (message.model_extra or {}).get("reasoning")
             if thinking_chunk is not None:
-                mot._thinking += thinking_chunk
+                mot.thinking += thinking_chunk
 
             content_chunk = message.content
             if content_chunk is not None:
@@ -1041,7 +1041,7 @@ async def processing(
             if thinking_chunk is None:
                 thinking_chunk = (message_delta.model_extra or {}).get("reasoning")
             if thinking_chunk is not None:
-                mot._thinking += thinking_chunk
+                mot.thinking += thinking_chunk
 
             content_chunk = message_delta.content
             if content_chunk is not None:
diff --git a/mellea/backends/watsonx.py b/mellea/backends/watsonx.py
index 87b42cccb..13d2a28a6 100644
--- a/mellea/backends/watsonx.py
+++ b/mellea/backends/watsonx.py
@@ -484,8 +484,8 @@ async def processing(self, mot: ModelOutputThunk, chunk: dict):
             mot (ModelOutputThunk): The output thunk being populated.
             chunk (dict): A single response dict or streaming delta from the WatsonX API.
         """
-        if mot._thinking is None:
-            mot._thinking = ""
+        if mot.thinking is None:
+            mot.thinking = ""
         if mot._underlying_value is None:
             mot._underlying_value = ""
 
@@ -499,7 +499,7 @@ async def processing(self, mot: ModelOutputThunk, chunk: dict):
 
             thinking_chunk = message.get("reasoning_content", None)
             if thinking_chunk is not None:
-                mot._thinking += thinking_chunk
+                mot.thinking += thinking_chunk
 
             content_chunk = message.get("content", "")
             if content_chunk is not None:
@@ -515,7 +515,7 @@ async def processing(self, mot: ModelOutputThunk, chunk: dict):
 
             thinking_chunk = message_delta.get("reasoning_content", None)
             if thinking_chunk is not None:
-                mot._thinking += thinking_chunk
+                mot.thinking += thinking_chunk
 
             content_chunk = message_delta.get("content", None)
             if content_chunk is not None:
diff --git a/mellea/core/base.py b/mellea/core/base.py
index ff2202559..933dc4e73 100644
--- a/mellea/core/base.py
+++ b/mellea/core/base.py
@@ -18,6 +18,7 @@
 import datetime
 import enum
 import logging
+import warnings
 from collections.abc import Callable, Coroutine, Iterable, Mapping
 from copy import copy, deepcopy
 from dataclasses import dataclass
@@ -394,7 +395,7 @@ def __init__(
 
         # Additional fields that should be standardized across apis.
         self.tool_calls = tool_calls
-        self._thinking: str | None = None
+        self.thinking: str | None = None
         self.generation: GenerationMetadata = GenerationMetadata()
         """Backend execution metadata populated during generation."""
 
@@ -594,7 +595,7 @@ def _copy_from(self, other: ModelOutputThunk) -> None:
         self._meta = other._meta
         self.parsed_repr = other.parsed_repr
         self.tool_calls = other.tool_calls
-        self._thinking = other._thinking
+        self.thinking = other.thinking
         self.generation = other.generation
         self._generate_log = other._generate_log
         self._cancelled = other._cancelled
@@ -611,6 +612,26 @@ def is_computed(self) -> bool:
         """
         return self._computed
 
+    @property
+    def _thinking(self) -> str | None:
+        """Deprecated alias for :attr:`thinking`.
+
+        Returns:
+            str | None: The model's reasoning/thinking trace.
+        """
+        warnings.warn(
+            "`ModelOutputThunk._thinking` is deprecated and will be removed in a "
+            "future minor release. Use `ModelOutputThunk.thinking` instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self.thinking
+
+    @_thinking.setter
+    def _thinking(self, value: str | None) -> None:
+        """Deprecated write alias for :attr:`thinking`."""
+        self.thinking = value
+
     @property
     def value(self) -> str | None:
         """Gets the value of the block."""
@@ -829,7 +850,7 @@ def __copy__(self) -> ModelOutputThunk:
         # _cancel_hook is not forwarded: a copied MOT is a distinct computation
         # and must not share the original's backend thread signal.
         copied._cancel_hook = None
-        copied._thinking = self._thinking
+        copied.thinking = self.thinking
         copied._action = self._action
         copied._context = self._context
         copied._generate_log = self._generate_log
@@ -862,7 +883,7 @@ def __deepcopy__(self, memo: dict) -> ModelOutputThunk:
         # _cancel_hook is not forwarded: a deepcopied MOT is a distinct computation
         # and must not share the original's backend thread signal.
         deepcopied._cancel_hook = None
-        deepcopied._thinking = self._thinking
+        deepcopied.thinking = self.thinking
         deepcopied._action = deepcopy(self._action)
         deepcopied._context = copy(
             self._context
diff --git a/mellea/stdlib/requirements/safety/guardian.py b/mellea/stdlib/requirements/safety/guardian.py
index 42c463612..58d15de64 100644
--- a/mellea/stdlib/requirements/safety/guardian.py
+++ b/mellea/stdlib/requirements/safety/guardian.py
@@ -389,7 +389,7 @@ async def validate(
         await mot.avalue()
 
         # Prefer explicit thinking if available, else try to split from output text.
-        trace = getattr(mot, "_thinking", None)
+        trace = mot.thinking
         text = mot.value or ""
         if trace is None and "</think>" in text:
             parts = text.split("</think>")
diff --git a/test/backends/test_litellm_thinking.py b/test/backends/test_litellm_thinking.py
index dfd08fe7e..68b182186 100644
--- a/test/backends/test_litellm_thinking.py
+++ b/test/backends/test_litellm_thinking.py
@@ -1,4 +1,4 @@
-"""Unit tests for LiteLLMBackend mot._thinking population.
+"""Unit tests for LiteLLMBackend mot.thinking population.
 
 Covers the vLLM case where the wire key is ``"reasoning"`` instead of
 ``"reasoning_content"``, and the case where LiteLLM has already normalised
@@ -75,7 +75,7 @@ async def test_processing_non_streaming_reasoning_content_key(backend: LiteLLMBa
         reasoning_value="France has its capital in Paris.",
     )
     await backend.processing(mot, chunk)
-    assert mot._thinking == "France has its capital in Paris."
+    assert mot.thinking == "France has its capital in Paris."
     assert mot._underlying_value == "Paris"
 
 
@@ -88,7 +88,7 @@ async def test_processing_non_streaming_reasoning_raw_key(backend: LiteLLMBacken
         reasoning_value="France has its capital in Paris.",
     )
     await backend.processing(mot, chunk)
-    assert mot._thinking == "France has its capital in Paris."
+    assert mot.thinking == "France has its capital in Paris."
     assert mot._underlying_value == "Paris"
 
 
@@ -109,7 +109,7 @@ async def test_processing_non_streaming_reasoning_content_wins_over_reasoning(
         object="chat.completion",
     )
     await backend.processing(mot, chunk)
-    assert mot._thinking == "from_reasoning_content"
+    assert mot.thinking == "from_reasoning_content"
 
 
 async def test_processing_non_streaming_no_reasoning(backend: LiteLLMBackend):
@@ -121,7 +121,7 @@ async def test_processing_non_streaming_no_reasoning(backend: LiteLLMBackend):
         reasoning_value="should be ignored",
     )
     await backend.processing(mot, chunk)
-    assert mot._thinking == ""
+    assert mot.thinking == ""
     assert mot._underlying_value == "Paris"
 
 
@@ -147,7 +147,7 @@ async def test_processing_non_streaming_empty_reasoning_content_does_not_fall_ba
         object="chat.completion",
     )
     await backend.processing(mot, chunk)
-    assert mot._thinking == ""
+    assert mot.thinking == ""
 
 
 # ---------------------------------------------------------------------------
@@ -163,7 +163,7 @@ async def test_processing_streaming_reasoning_content_key(backend: LiteLLMBacken
             content="", reasoning_key="reasoning_content", reasoning_value=text
         )
         await backend.processing(mot, stream_chunk)
-    assert mot._thinking == "chunk1 chunk2"
+    assert mot.thinking == "chunk1 chunk2"
 
 
 async def test_processing_streaming_reasoning_raw_key(backend: LiteLLMBackend):
@@ -174,7 +174,7 @@ async def test_processing_streaming_reasoning_raw_key(backend: LiteLLMBackend):
             content="", reasoning_key="reasoning", reasoning_value=text
         )
         await backend.processing(mot, stream_chunk)
-    assert mot._thinking == "chunk1 chunk2"
+    assert mot.thinking == "chunk1 chunk2"
 
 
 async def test_processing_streaming_reasoning_content_wins_over_reasoning(
@@ -190,7 +190,7 @@ async def test_processing_streaming_reasoning_content_wins_over_reasoning(
         id="test", choices=[chunk_choice], created=0, model="openai/qwen3"
     )
     await backend.processing(mot, stream_chunk)
-    assert mot._thinking == "from_reasoning_content"
+    assert mot.thinking == "from_reasoning_content"
 
 
 async def test_processing_streaming_no_reasoning(backend: LiteLLMBackend):
@@ -200,7 +200,7 @@ async def test_processing_streaming_no_reasoning(backend: LiteLLMBackend):
         content="Paris", reasoning_key="unrelated_key", reasoning_value="ignored"
     )
     await backend.processing(mot, stream_chunk)
-    assert mot._thinking == ""
+    assert mot.thinking == ""
     assert mot._underlying_value == "Paris"
 
 
@@ -220,7 +220,7 @@ async def test_processing_streaming_empty_reasoning_content_does_not_fall_back(
         id="test", choices=[chunk_choice], created=0, model="openai/qwen3"
     )
     await backend.processing(mot, stream_chunk)
-    assert mot._thinking == ""
+    assert mot.thinking == ""
 
 
 # ---------------------------------------------------------------------------
diff --git a/test/backends/test_openai_unit.py b/test/backends/test_openai_unit.py
index 03bd70df0..7ee7d84ea 100644
--- a/test/backends/test_openai_unit.py
+++ b/test/backends/test_openai_unit.py
@@ -193,7 +193,7 @@ def _vllm_chat_completion(reasoning: str, content: str | None) -> ChatCompletion
 
 
 async def test_processing_captures_vllm_reasoning_field(backend):
-    """Non-streaming: mot._thinking captures the raw ``reasoning`` key from vLLM."""
+    """Non-streaming: mot.thinking captures the raw ``reasoning`` key from vLLM."""
     mot: ModelOutputThunk = ModelOutputThunk(value=None)
     chunk = _vllm_chat_completion(reasoning="2 + 2 equals 4.", content="4")
     # Sanity check: the SDK object does not expose reasoning_content
@@ -201,7 +201,7 @@ async def test_processing_captures_vllm_reasoning_field(backend):
 
     await backend.processing(mot, chunk)
 
-    assert mot._thinking == "2 + 2 equals 4."
+    assert mot.thinking == "2 + 2 equals 4."
     assert mot._underlying_value == "4"
 
 
@@ -212,12 +212,12 @@ async def test_processing_vllm_reasoning_with_null_content(backend):
 
     await backend.processing(mot, chunk)
 
-    assert mot._thinking == "some thinking"
+    assert mot.thinking == "some thinking"
     assert mot._underlying_value == ""
 
 
 async def test_processing_streaming_captures_vllm_reasoning_field(backend):
-    """Streaming: per-chunk ``reasoning`` deltas accumulate into mot._thinking."""
+    """Streaming: per-chunk ``reasoning`` deltas accumulate into mot.thinking."""
     mot: ModelOutputThunk = ModelOutputThunk(value=None)
     chunk_a = ChatCompletionChunk.model_validate(
         {
@@ -257,7 +257,7 @@ async def test_processing_streaming_captures_vllm_reasoning_field(backend):
     await backend.processing(mot, chunk_a)
     await backend.processing(mot, chunk_b)
 
-    assert mot._thinking == "first second"
+    assert mot.thinking == "first second"
     assert mot._underlying_value == "ans"
 
 
@@ -287,7 +287,7 @@ async def test_processing_reasoning_content_still_used(backend):
     mot: ModelOutputThunk = ModelOutputThunk(value=None)
     await backend.processing(mot, chunk)
 
-    assert mot._thinking == "attribute-style trace"
+    assert mot.thinking == "attribute-style trace"
     assert mot._underlying_value == "answer"
 
 
@@ -311,7 +311,7 @@ async def test_processing_reasoning_content_takes_precedence_over_reasoning(back
     mot: ModelOutputThunk = ModelOutputThunk(value=None)
     await backend.processing(mot, chunk)
 
-    assert mot._thinking == "attr-trace"
+    assert mot.thinking == "attr-trace"
     assert mot._underlying_value == "answer"
 
 
diff --git a/test/core/test_base.py b/test/core/test_base.py
index a3424d0f4..89e04ef1b 100644
--- a/test/core/test_base.py
+++ b/test/core/test_base.py
@@ -275,6 +275,19 @@ def test_mot_error_carried_by_copy_methods() -> None:
     assert target.error is err
 
 
+def test_mot_thinking_public_field_round_trip():
+    mot = ModelOutputThunk(value="x")
+    mot.thinking = "reasoning trace"
+    assert mot.thinking == "reasoning trace"
+
+
+def test_mot__thinking_deprecated_alias_warns_on_read():
+    mot = ModelOutputThunk(value="x")
+    mot.thinking = "reasoning trace"
+    with pytest.warns(DeprecationWarning, match="ModelOutputThunk._thinking"):
+        assert mot._thinking == "reasoning trace"
+
+
 if __name__ == "__main__":
     pytest.main([__file__])
 

From 837fa073fa393d82995e6ecd978f03b2edfc52e9 Mon Sep 17 00:00:00 2001
From: Phil Williams <phil.williams@ibm.com>
Date: Mon, 22 Jun 2026 14:42:13 -0400
Subject: [PATCH 2/2] Add coverage for changes

Signed-off-by: Phil Williams <phil.williams@ibm.com>
---
 test/backends/test_ollama_unit.py             | 12 +++++++
 test/backends/test_stop_sequences_unit.py     | 35 +++++++++++++++++++
 test/core/test_base.py                        |  6 ++++
 .../requirements/test_guardian_check_unit.py  | 32 +++++++++++++++++
 4 files changed, 85 insertions(+)
 create mode 100644 test/stdlib/requirements/test_guardian_check_unit.py

diff --git a/test/backends/test_ollama_unit.py b/test/backends/test_ollama_unit.py
index 7d0c4b03f..992a7e709 100644
--- a/test/backends/test_ollama_unit.py
+++ b/test/backends/test_ollama_unit.py
@@ -204,6 +204,18 @@ def test_delta_merge_thinking_concatenated():
     assert mot._meta["chat_response"].message.thinking == "step 1 step 2"
 
 
+@pytest.mark.asyncio
+async def test_processing_initializes_and_accumulates_thinking(
+    backend: OllamaModelBackend,
+):
+    """processing() initializes thinking and accumulates chunk thinking text."""
+    mot = ModelOutputThunk(value=None)
+    await backend.processing(mot, _make_delta("answer", thinking="step 1"), {})
+
+    assert mot.thinking == "step 1"
+    assert mot._underlying_value == "answer"
+
+
 # --- timeout wiring ---
 
 
diff --git a/test/backends/test_stop_sequences_unit.py b/test/backends/test_stop_sequences_unit.py
index d4e200f7e..06ee57a89 100644
--- a/test/backends/test_stop_sequences_unit.py
+++ b/test/backends/test_stop_sequences_unit.py
@@ -11,6 +11,7 @@
 from mellea.backends import ModelOption
 from mellea.backends.ollama import OllamaModelBackend
 from mellea.backends.openai import OpenAIBackend
+from mellea.core import ModelOutputThunk
 
 # --- OpenAI ---
 
@@ -127,6 +128,40 @@ def test_watsonx_stop_sequences_round_trip(is_chat, native_key):
     assert ModelOption.STOP_SEQUENCES not in backend_specific
 
 
+@pytest.mark.asyncio
+async def test_watsonx_processing_non_streaming_captures_reasoning_content():
+    backend = _make_watsonx_backend()
+    mot = ModelOutputThunk(value=None)
+
+    chunk = {
+        "choices": [
+            {"message": {"reasoning_content": "trace", "content": "answer content"}}
+        ]
+    }
+    await backend.processing(mot, chunk)
+
+    assert mot.thinking == "trace"
+    assert mot._underlying_value == "answer content"
+    assert mot._meta["oai_chat_response_choice"] == chunk["choices"][0]
+
+
+@pytest.mark.asyncio
+async def test_watsonx_processing_streaming_captures_reasoning_content():
+    backend = _make_watsonx_backend()
+    mot = ModelOutputThunk(value=None)
+
+    await backend.processing(
+        mot, {"choices": [{"delta": {"reasoning_content": "a", "content": "x"}}]}
+    )
+    await backend.processing(
+        mot, {"choices": [{"delta": {"reasoning_content": "b", "content": "y"}}]}
+    )
+
+    assert mot.thinking == "ab"
+    assert mot._underlying_value == "xy"
+    assert len(mot._meta["oai_chat_response_streamed"]) == 2
+
+
 # --- HuggingFace ---
 
 
diff --git a/test/core/test_base.py b/test/core/test_base.py
index 89e04ef1b..99daa5dcc 100644
--- a/test/core/test_base.py
+++ b/test/core/test_base.py
@@ -288,6 +288,12 @@ def test_mot__thinking_deprecated_alias_warns_on_read():
         assert mot._thinking == "reasoning trace"
 
 
+def test_mot__thinking_deprecated_alias_write_sets_public_field():
+    mot = ModelOutputThunk(value="x")
+    mot._thinking = "reasoning trace"
+    assert mot.thinking == "reasoning trace"
+
+
 if __name__ == "__main__":
     pytest.main([__file__])
 
diff --git a/test/stdlib/requirements/test_guardian_check_unit.py b/test/stdlib/requirements/test_guardian_check_unit.py
new file mode 100644
index 000000000..d34e26e5d
--- /dev/null
+++ b/test/stdlib/requirements/test_guardian_check_unit.py
@@ -0,0 +1,32 @@
+"""Unit tests for GuardianCheck requirement behavior."""
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from mellea.core import ModelOutputThunk
+from mellea.stdlib.components import Message
+from mellea.stdlib.context import ChatContext
+from mellea.stdlib.requirements.safety.guardian import GuardianCheck
+
+
+@pytest.mark.asyncio
+async def test_guardian_validate_uses_thinking_trace_in_reason() -> None:
+    """validate() should include explicit mot.thinking content in the reason."""
+    mot = ModelOutputThunk(value="<score>no</score>")
+    mot.thinking = "grounded in provided content"
+
+    backend = MagicMock()
+    backend.generate_from_context = AsyncMock(return_value=(mot, ChatContext()))
+
+    with pytest.warns(DeprecationWarning, match="GuardianCheck is deprecated"):
+        req = GuardianCheck(risk="harm", backend=backend, backend_type="ollama")
+
+    ctx = ChatContext().add(Message("user", "Is this safe?")).add(
+        Message("assistant", "Yes.")
+    )
+    result = await req.validate(backend, ctx)
+
+    assert result.as_bool() is True
+    assert result.reason is not None
+    assert "Reasoning: grounded in provided content" in result.reason