generative-computing · exequeryphil · Jun 22, 2026 · Jun 22, 2026 · ajbozarth · Jun 22, 2026
@@ -357,7 +357,7 @@ Diagnose with:
 result = m.instruct("What is 2 + 2?")
 print(repr(result.value))                    # ''
 print(result.generation.usage)               # {'completion_tokens': 9, ...}
-print(result._thinking)                      # populated reasoning content, if any
+print(result.thinking)                       # populated reasoning content, if any
 ```
 
 This affects models that default to thinking mode, most commonly Qwen3 served
@@ -383,7 +383,7 @@ m = MelleaSession(
 
 Other inference servers expose the same control under different names — check
 your runtime's documentation. If you intend to use thinking mode, read the
-reasoning trace from `result._thinking` rather than `result.value`.
+reasoning trace from `result.thinking` rather than `result.value`.
 
 ---
 

@@ -470,8 +470,8 @@ async def processing(
             chunk (litellm.ModelResponse | litellm.ModelResponseStream): A single
                 response object or streaming chunk from LiteLLM.
         """
-        if mot._thinking is None:
-            mot._thinking = ""
+        if mot.thinking is None:
+            mot.thinking = ""
         if mot._underlying_value is None:
             mot._underlying_value = ""
 
@@ -490,7 +490,7 @@ async def processing(
             if thinking_chunk is None:
                 thinking_chunk = message.get("reasoning")
             if thinking_chunk is not None:
-                mot._thinking += thinking_chunk
+                mot.thinking += thinking_chunk
 
             content_chunk = message.content
             if content_chunk is not None:
@@ -509,7 +509,7 @@ async def processing(
             if thinking_chunk is None:
                 thinking_chunk = message_delta.get("reasoning")
             if thinking_chunk is not None:
-                mot._thinking += thinking_chunk
+                mot.thinking += thinking_chunk
 
             content_chunk = message_delta.content
             if content_chunk is not None:

@@ -683,11 +683,11 @@ async def processing(
             tools (dict[str, AbstractMelleaTool]): Available tools, keyed by name,
                 used for extracting tool call requests from the response.
         """
-        if mot._thinking is None:
-            mot._thinking = ""
+        if mot.thinking is None:
+            mot.thinking = ""
         thinking_chunk = chunk.message.thinking
         if thinking_chunk is not None:
-            mot._thinking += thinking_chunk
+            mot.thinking += thinking_chunk
 
         if mot._underlying_value is None:
             mot._underlying_value = ""

@@ -1002,8 +1002,8 @@ async def processing(
             chunk (ChatCompletion | ChatCompletionChunk): A single response object or
                 streaming delta from the OpenAI API.
         """
-        if mot._thinking is None:
-            mot._thinking = ""
+        if mot.thinking is None:
+            mot.thinking = ""
         if mot._underlying_value is None:
             mot._underlying_value = ""
 
@@ -1016,7 +1016,7 @@ async def processing(
             if thinking_chunk is None:
                 thinking_chunk = (message.model_extra or {}).get("reasoning")
             if thinking_chunk is not None:
-                mot._thinking += thinking_chunk
+                mot.thinking += thinking_chunk
 
             content_chunk = message.content
             if content_chunk is not None:
@@ -1041,7 +1041,7 @@ async def processing(
             if thinking_chunk is None:
                 thinking_chunk = (message_delta.model_extra or {}).get("reasoning")
             if thinking_chunk is not None:
-                mot._thinking += thinking_chunk
+                mot.thinking += thinking_chunk
 
             content_chunk = message_delta.content
             if content_chunk is not None:

@@ -484,8 +484,8 @@ async def processing(self, mot: ModelOutputThunk, chunk: dict):
             mot (ModelOutputThunk): The output thunk being populated.
             chunk (dict): A single response dict or streaming delta from the WatsonX API.
         """
-        if mot._thinking is None:
-            mot._thinking = ""
+        if mot.thinking is None:
+            mot.thinking = ""
         if mot._underlying_value is None:
             mot._underlying_value = ""
 
@@ -499,7 +499,7 @@ async def processing(self, mot: ModelOutputThunk, chunk: dict):
 
             thinking_chunk = message.get("reasoning_content", None)
             if thinking_chunk is not None:
-                mot._thinking += thinking_chunk
+                mot.thinking += thinking_chunk
 
             content_chunk = message.get("content", "")
             if content_chunk is not None:
@@ -515,7 +515,7 @@ async def processing(self, mot: ModelOutputThunk, chunk: dict):
 
             thinking_chunk = message_delta.get("reasoning_content", None)
             if thinking_chunk is not None:
-                mot._thinking += thinking_chunk
+                mot.thinking += thinking_chunk
 
             content_chunk = message_delta.get("content", None)
             if content_chunk is not None:

@@ -18,6 +18,7 @@
 import datetime
 import enum
 import logging
+import warnings
 from collections.abc import Callable, Coroutine, Iterable, Mapping
 from copy import copy, deepcopy
 from dataclasses import dataclass
@@ -394,7 +395,7 @@ def __init__(
 
         # Additional fields that should be standardized across apis.
         self.tool_calls = tool_calls
-        self._thinking: str | None = None
+        self.thinking: str | None = None
         self.generation: GenerationMetadata = GenerationMetadata()
         """Backend execution metadata populated during generation."""
 
@@ -594,7 +595,7 @@ def _copy_from(self, other: ModelOutputThunk) -> None:
         self._meta = other._meta
         self.parsed_repr = other.parsed_repr
         self.tool_calls = other.tool_calls
-        self._thinking = other._thinking
+        self.thinking = other.thinking
         self.generation = other.generation
         self._generate_log = other._generate_log
         self._cancelled = other._cancelled
@@ -611,6 +612,26 @@ def is_computed(self) -> bool:
         """
         return self._computed
 
+    @property
+    def _thinking(self) -> str | None:
+        """Deprecated alias for :attr:`thinking`.
+
+        Returns:
+            str | None: The model's reasoning/thinking trace.
+        """
+        warnings.warn(
+            "`ModelOutputThunk._thinking` is deprecated and will be removed in a "
+            "future minor release. Use `ModelOutputThunk.thinking` instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return self.thinking
+
+    @_thinking.setter
+    def _thinking(self, value: str | None) -> None:
+        """Deprecated write alias for :attr:`thinking`."""
+        self.thinking = value
+
     @property
     def value(self) -> str | None:
         """Gets the value of the block."""
@@ -829,7 +850,7 @@ def __copy__(self) -> ModelOutputThunk:
         # _cancel_hook is not forwarded: a copied MOT is a distinct computation
         # and must not share the original's backend thread signal.
         copied._cancel_hook = None
-        copied._thinking = self._thinking
+        copied.thinking = self.thinking
         copied._action = self._action
         copied._context = self._context
         copied._generate_log = self._generate_log
@@ -862,7 +883,7 @@ def __deepcopy__(self, memo: dict) -> ModelOutputThunk:
         # _cancel_hook is not forwarded: a deepcopied MOT is a distinct computation
         # and must not share the original's backend thread signal.
         deepcopied._cancel_hook = None
-        deepcopied._thinking = self._thinking
+        deepcopied.thinking = self.thinking
         deepcopied._action = deepcopy(self._action)
         deepcopied._context = copy(
             self._context

@@ -389,7 +389,7 @@ async def validate(
         await mot.avalue()
 
         # Prefer explicit thinking if available, else try to split from output text.
-        trace = getattr(mot, "_thinking", None)
+        trace = mot.thinking
         text = mot.value or ""
         if trace is None and "</think>" in text:
             parts = text.split("</think>")

@@ -1,4 +1,4 @@
-"""Unit tests for LiteLLMBackend mot._thinking population.
+"""Unit tests for LiteLLMBackend mot.thinking population.
 
 Covers the vLLM case where the wire key is ``"reasoning"`` instead of
 ``"reasoning_content"``, and the case where LiteLLM has already normalised
@@ -75,7 +75,7 @@ async def test_processing_non_streaming_reasoning_content_key(backend: LiteLLMBa
         reasoning_value="France has its capital in Paris.",
     )
     await backend.processing(mot, chunk)
-    assert mot._thinking == "France has its capital in Paris."
+    assert mot.thinking == "France has its capital in Paris."
     assert mot._underlying_value == "Paris"
 
 
@@ -88,7 +88,7 @@ async def test_processing_non_streaming_reasoning_raw_key(backend: LiteLLMBacken
         reasoning_value="France has its capital in Paris.",
     )
     await backend.processing(mot, chunk)
-    assert mot._thinking == "France has its capital in Paris."
+    assert mot.thinking == "France has its capital in Paris."
     assert mot._underlying_value == "Paris"
 
 
@@ -109,7 +109,7 @@ async def test_processing_non_streaming_reasoning_content_wins_over_reasoning(
         object="chat.completion",
     )
     await backend.processing(mot, chunk)
-    assert mot._thinking == "from_reasoning_content"
+    assert mot.thinking == "from_reasoning_content"
 
 
 async def test_processing_non_streaming_no_reasoning(backend: LiteLLMBackend):
@@ -121,7 +121,7 @@ async def test_processing_non_streaming_no_reasoning(backend: LiteLLMBackend):
         reasoning_value="should be ignored",
     )
     await backend.processing(mot, chunk)
-    assert mot._thinking == ""
+    assert mot.thinking == ""
     assert mot._underlying_value == "Paris"
 
 
@@ -147,7 +147,7 @@ async def test_processing_non_streaming_empty_reasoning_content_does_not_fall_ba
         object="chat.completion",
     )
     await backend.processing(mot, chunk)
-    assert mot._thinking == ""
+    assert mot.thinking == ""
 
 
 # ---------------------------------------------------------------------------
@@ -163,7 +163,7 @@ async def test_processing_streaming_reasoning_content_key(backend: LiteLLMBacken
             content="", reasoning_key="reasoning_content", reasoning_value=text
         )
         await backend.processing(mot, stream_chunk)
-    assert mot._thinking == "chunk1 chunk2"
+    assert mot.thinking == "chunk1 chunk2"
 
 
 async def test_processing_streaming_reasoning_raw_key(backend: LiteLLMBackend):
@@ -174,7 +174,7 @@ async def test_processing_streaming_reasoning_raw_key(backend: LiteLLMBackend):
             content="", reasoning_key="reasoning", reasoning_value=text
         )
         await backend.processing(mot, stream_chunk)
-    assert mot._thinking == "chunk1 chunk2"
+    assert mot.thinking == "chunk1 chunk2"
 
 
 async def test_processing_streaming_reasoning_content_wins_over_reasoning(
@@ -190,7 +190,7 @@ async def test_processing_streaming_reasoning_content_wins_over_reasoning(
         id="test", choices=[chunk_choice], created=0, model="openai/qwen3"
     )
     await backend.processing(mot, stream_chunk)
-    assert mot._thinking == "from_reasoning_content"
+    assert mot.thinking == "from_reasoning_content"
 
 
 async def test_processing_streaming_no_reasoning(backend: LiteLLMBackend):
@@ -200,7 +200,7 @@ async def test_processing_streaming_no_reasoning(backend: LiteLLMBackend):
         content="Paris", reasoning_key="unrelated_key", reasoning_value="ignored"
     )
     await backend.processing(mot, stream_chunk)
-    assert mot._thinking == ""
+    assert mot.thinking == ""
     assert mot._underlying_value == "Paris"
 
 
@@ -220,7 +220,7 @@ async def test_processing_streaming_empty_reasoning_content_does_not_fall_back(
         id="test", choices=[chunk_choice], created=0, model="openai/qwen3"
     )
     await backend.processing(mot, stream_chunk)
-    assert mot._thinking == ""
+    assert mot.thinking == ""
 
 
 # ---------------------------------------------------------------------------

@@ -204,6 +204,18 @@ def test_delta_merge_thinking_concatenated():
     assert mot._meta["chat_response"].message.thinking == "step 1 step 2"
 
 
+@pytest.mark.asyncio
+async def test_processing_initializes_and_accumulates_thinking(
+    backend: OllamaModelBackend,
+):
+    """processing() initializes thinking and accumulates chunk thinking text."""
+    mot = ModelOutputThunk(value=None)
+    await backend.processing(mot, _make_delta("answer", thinking="step 1"), {})
+
+    assert mot.thinking == "step 1"
+    assert mot._underlying_value == "answer"
+
+
 # --- timeout wiring ---
 
 

@@ -193,15 +193,15 @@ def _vllm_chat_completion(reasoning: str, content: str | None) -> ChatCompletion
 
 
 async def test_processing_captures_vllm_reasoning_field(backend):
-    """Non-streaming: mot._thinking captures the raw ``reasoning`` key from vLLM."""
+    """Non-streaming: mot.thinking captures the raw ``reasoning`` key from vLLM."""
     mot: ModelOutputThunk = ModelOutputThunk(value=None)
     chunk = _vllm_chat_completion(reasoning="2 + 2 equals 4.", content="4")
     # Sanity check: the SDK object does not expose reasoning_content
     assert not hasattr(chunk.choices[0].message, "reasoning_content")
 
     await backend.processing(mot, chunk)
 
-    assert mot._thinking == "2 + 2 equals 4."
+    assert mot.thinking == "2 + 2 equals 4."
     assert mot._underlying_value == "4"
 
 
@@ -212,12 +212,12 @@ async def test_processing_vllm_reasoning_with_null_content(backend):
 
     await backend.processing(mot, chunk)
 
-    assert mot._thinking == "some thinking"
+    assert mot.thinking == "some thinking"
     assert mot._underlying_value == ""
 
 
 async def test_processing_streaming_captures_vllm_reasoning_field(backend):
-    """Streaming: per-chunk ``reasoning`` deltas accumulate into mot._thinking."""
+    """Streaming: per-chunk ``reasoning`` deltas accumulate into mot.thinking."""
     mot: ModelOutputThunk = ModelOutputThunk(value=None)
     chunk_a = ChatCompletionChunk.model_validate(
         {
@@ -257,7 +257,7 @@ async def test_processing_streaming_captures_vllm_reasoning_field(backend):
     await backend.processing(mot, chunk_a)
     await backend.processing(mot, chunk_b)
 
-    assert mot._thinking == "first second"
+    assert mot.thinking == "first second"
     assert mot._underlying_value == "ans"
 
 
@@ -287,7 +287,7 @@ async def test_processing_reasoning_content_still_used(backend):
     mot: ModelOutputThunk = ModelOutputThunk(value=None)
     await backend.processing(mot, chunk)
 
-    assert mot._thinking == "attribute-style trace"
+    assert mot.thinking == "attribute-style trace"
     assert mot._underlying_value == "answer"
 
 
@@ -311,7 +311,7 @@ async def test_processing_reasoning_content_takes_precedence_over_reasoning(back
     mot: ModelOutputThunk = ModelOutputThunk(value=None)
     await backend.processing(mot, chunk)
 
-    assert mot._thinking == "attr-trace"
+    assert mot.thinking == "attr-trace"
     assert mot._underlying_value == "answer"