Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 3 additions & 26 deletions src/forge/clients/llamafile.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,39 +22,16 @@
from forge.core.workflow import LLMResponse, TextResponse, ToolCall, ToolSpec
from forge.errors import BackendError, ContextDiscoveryError
from forge.prompts.templates import build_tool_prompt, extract_tool_call

# Model-specific thinking tag formats. Extend this list when adding new model
# families. If a model library/registry is added later, move these patterns
# into per-model profiles instead of hard-coding here.
# - [THINK]...[/THINK] — Mistral (Ministral Reasoning)
# - <think>...</think> — Qwen3, DeepSeek
_THINK_TAG_RE = re.compile(
r"\[THINK\](.*?)\[/THINK\]|<think>(.*?)</think>", re.DOTALL
)
# Re-exported under the historical private name so existing imports
# (`from forge.clients.llamafile import _extract_think_tags`) keep working.
from forge.prompts.think_tags import extract_think_tags as _extract_think_tags

# Multi-shard GGUF naming convention: "<stem>-00001-of-00003.gguf". The shard
# index is filesystem layout, not model identity, so strip it for the
# sampling-defaults registry key.
_SHARD_SUFFIX_RE = re.compile(r"-\d{5}-of-\d{5}$")


def _extract_think_tags(text: str) -> tuple[str, str]:
"""Extract thinking blocks from text.

Supports [THINK]...[/THINK] (Mistral) and <think>...</think> (Qwen/DeepSeek).
Returns (reasoning, remaining_content).
"""
reasoning_parts: list[str] = []
remaining = text
for m in _THINK_TAG_RE.finditer(text):
# group(1) is [THINK] match, group(2) is <think> match
content = (m.group(1) or m.group(2) or "").strip()
reasoning_parts.append(content)
if reasoning_parts:
remaining = _THINK_TAG_RE.sub("", text).strip()
return "\n\n".join(reasoning_parts), remaining


def _merge_consecutive(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Ensure strict user/assistant alternation for Jinja parity checker.

Expand Down
7 changes: 1 addition & 6 deletions src/forge/prompts/templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import re

from forge.core.workflow import ToolCall, ToolSpec
from forge.prompts.think_tags import THINK_TAG_RE as _THINK_TAG_RE


def build_tool_prompt(tools: list[ToolSpec]) -> str:
Expand Down Expand Up @@ -121,12 +122,6 @@ def _try_parse_tool_call(json_str: str, available_tools: list[str]) -> ToolCall
r"(\w+)\[ARGS\](\{.*\})", re.DOTALL
)

# Think tag patterns (same as llamafile._THINK_TAG_RE) — needed to strip
# thinking blocks before rescue parsing.
_THINK_TAG_RE = re.compile(
r"\[THINK\].*?\[/THINK\]|<think>.*?</think>", re.DOTALL
)

# Qwen Coder XML tool call format.
# <function=name>
# <parameter=key>value</parameter>
Expand Down
46 changes: 46 additions & 0 deletions src/forge/prompts/think_tags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""Thinking/reasoning tag parsing shared across client adapters.

Reasoning models wrap their chain-of-thought in delimiter tags. When the
backend's reasoning parser is absent — or doesn't split a given model's output
into a dedicated field — that thinking arrives inline in the message
``content`` instead. This module is the single source of truth for detecting
and extracting those blocks, used by the client adapters (to populate
``ToolCall.reasoning`` and to clean ``TextResponse`` content) and by the
prompt-rescue path in ``templates`` (to strip thinking before parsing a
rehearsed tool call).

Supported delimiters:
- ``[THINK]...[/THINK]`` — Mistral (Ministral Reasoning)
- ``<think>...</think>`` — Qwen3, DeepSeek

Extend ``THINK_TAG_RE`` when adding a new model family. If a model
library/registry is added later, move these patterns into per-model profiles
instead of hard-coding here.
"""

from __future__ import annotations

import re

THINK_TAG_RE = re.compile(
r"\[THINK\](.*?)\[/THINK\]|<think>(.*?)</think>", re.DOTALL
)


def extract_think_tags(text: str) -> tuple[str, str]:
"""Split thinking blocks out of ``text``.

Returns ``(reasoning, remaining_content)``: the concatenated thinking
blocks (joined by blank lines) and the text with those blocks removed and
stripped. When no tags are present, ``reasoning`` is the empty string and
``remaining_content`` is the original text unchanged.
"""
reasoning_parts: list[str] = []
remaining = text
for m in THINK_TAG_RE.finditer(text):
# group(1) is the [THINK] body, group(2) is the <think> body.
content = (m.group(1) or m.group(2) or "").strip()
reasoning_parts.append(content)
if reasoning_parts:
remaining = THINK_TAG_RE.sub("", text).strip()
return "\n\n".join(reasoning_parts), remaining
Loading