antoinezambelli · antoinezambelli · Jun 20, 2026 · Jun 20, 2026
diff --git a/src/forge/clients/llamafile.py b/src/forge/clients/llamafile.py
@@ -22,39 +22,16 @@
 from forge.core.workflow import LLMResponse, TextResponse, ToolCall, ToolSpec
 from forge.errors import BackendError, ContextDiscoveryError
 from forge.prompts.templates import build_tool_prompt, extract_tool_call
-
-# Model-specific thinking tag formats. Extend this list when adding new model
-# families. If a model library/registry is added later, move these patterns
-# into per-model profiles instead of hard-coding here.
-#   - [THINK]...[/THINK]  — Mistral (Ministral Reasoning)
-#   - <think>...</think>   — Qwen3, DeepSeek
-_THINK_TAG_RE = re.compile(
-    r"\[THINK\](.*?)\[/THINK\]|<think>(.*?)</think>", re.DOTALL
-)
+# Re-exported under the historical private name so existing imports
+# (`from forge.clients.llamafile import _extract_think_tags`) keep working.
+from forge.prompts.think_tags import extract_think_tags as _extract_think_tags
 
 # Multi-shard GGUF naming convention: "<stem>-00001-of-00003.gguf". The shard
 # index is filesystem layout, not model identity, so strip it for the
 # sampling-defaults registry key.
 _SHARD_SUFFIX_RE = re.compile(r"-\d{5}-of-\d{5}$")
 
 
-def _extract_think_tags(text: str) -> tuple[str, str]:
-    """Extract thinking blocks from text.
-
-    Supports [THINK]...[/THINK] (Mistral) and <think>...</think> (Qwen/DeepSeek).
-    Returns (reasoning, remaining_content).
-    """
-    reasoning_parts: list[str] = []
-    remaining = text
-    for m in _THINK_TAG_RE.finditer(text):
-        # group(1) is [THINK] match, group(2) is <think> match
-        content = (m.group(1) or m.group(2) or "").strip()
-        reasoning_parts.append(content)
-    if reasoning_parts:
-        remaining = _THINK_TAG_RE.sub("", text).strip()
-    return "\n\n".join(reasoning_parts), remaining
-
-
 def _merge_consecutive(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
     """Ensure strict user/assistant alternation for Jinja parity checker.
 

diff --git a/src/forge/prompts/templates.py b/src/forge/prompts/templates.py
@@ -6,6 +6,7 @@
 import re
 
 from forge.core.workflow import ToolCall, ToolSpec
+from forge.prompts.think_tags import THINK_TAG_RE as _THINK_TAG_RE
 
 
 def build_tool_prompt(tools: list[ToolSpec]) -> str:
@@ -121,12 +122,6 @@ def _try_parse_tool_call(json_str: str, available_tools: list[str]) -> ToolCall
     r"(\w+)\[ARGS\](\{.*\})", re.DOTALL
 )
 
-# Think tag patterns (same as llamafile._THINK_TAG_RE) — needed to strip
-# thinking blocks before rescue parsing.
-_THINK_TAG_RE = re.compile(
-    r"\[THINK\].*?\[/THINK\]|<think>.*?</think>", re.DOTALL
-)
-
 # Qwen Coder XML tool call format.
 # <function=name>
 #   <parameter=key>value</parameter>

diff --git a/src/forge/prompts/think_tags.py b/src/forge/prompts/think_tags.py
@@ -0,0 +1,46 @@
+"""Thinking/reasoning tag parsing shared across client adapters.
+
+Reasoning models wrap their chain-of-thought in delimiter tags. When the
+backend's reasoning parser is absent — or doesn't split a given model's output
+into a dedicated field — that thinking arrives inline in the message
+``content`` instead. This module is the single source of truth for detecting
+and extracting those blocks, used by the client adapters (to populate
+``ToolCall.reasoning`` and to clean ``TextResponse`` content) and by the
+prompt-rescue path in ``templates`` (to strip thinking before parsing a
+rehearsed tool call).
+
+Supported delimiters:
+  - ``[THINK]...[/THINK]``  — Mistral (Ministral Reasoning)
+  - ``<think>...</think>``  — Qwen3, DeepSeek
+
+Extend ``THINK_TAG_RE`` when adding a new model family. If a model
+library/registry is added later, move these patterns into per-model profiles
+instead of hard-coding here.
+"""
+
+from __future__ import annotations
+
+import re
+
+THINK_TAG_RE = re.compile(
+    r"\[THINK\](.*?)\[/THINK\]|<think>(.*?)</think>", re.DOTALL
+)
+
+
+def extract_think_tags(text: str) -> tuple[str, str]:
+    """Split thinking blocks out of ``text``.
+
+    Returns ``(reasoning, remaining_content)``: the concatenated thinking
+    blocks (joined by blank lines) and the text with those blocks removed and
+    stripped. When no tags are present, ``reasoning`` is the empty string and
+    ``remaining_content`` is the original text unchanged.
+    """
+    reasoning_parts: list[str] = []
+    remaining = text
+    for m in THINK_TAG_RE.finditer(text):
+        # group(1) is the [THINK] body, group(2) is the <think> body.
+        content = (m.group(1) or m.group(2) or "").strip()
+        reasoning_parts.append(content)
+    if reasoning_parts:
+        remaining = THINK_TAG_RE.sub("", text).strip()
+    return "\n\n".join(reasoning_parts), remaining