diff --git a/CHANGELOG.md b/CHANGELOG.md
index a153e55e..080d7b76 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 ## [Unreleased]
 
+### Fixed
+
+- **#2083** — Reasoning models (Qwen3-thinking via LM Studio, DeepSeek-R1, Kimi-K2, etc.) no longer trigger a budget-doubling retry on auto-title generation when the model emits hidden reasoning tokens but no visible content (`api/streaming.py:_extract_title_response` and `_title_retry_status`). Pre-fix: a reasoning model that burned its entire 512-token budget on hidden thinking returned `finish_reason: length` with non-empty `reasoning_content`. `_extract_title_response()` classified that as `llm_length`, which triggered the budget-doubling retry path — and since the next call produced the same empty-reasoning shape, the retry just doubled the GPU/credit burn. Repeated across the two prompts in `_title_prompts()` that was up to ~3000 reasoning tokens of GPU work per new chat, and on local LM Studio servers (where `is_lmstudio=False` for `custom:` providers means `reasoning_effort: "none"` never reaches the model) it presented as the GPU never going idle after a prompt. Fix: classify any reasoning-bearing empty response as `llm_empty_reasoning` regardless of `finish_reason`, and short-circuit both the within-prompt budget retry AND the cross-prompt iteration on that status. Length-truncated responses WITHOUT reasoning tokens still get the legitimate budget-doubling retry. Falls through to `_fallback_title_from_exchange` for a local-summary title. Reported by @darkopetrovic. Companion agent-side classifier work (matching LM Studio via `base_url` fingerprint for `custom:` providers) tracked separately on the hermes-agent side.
+
 ## [v0.51.46] — 2026-05-11 — Release V (5-PR contributor batch — CSP report-only + logs panel polish + plugin slash commands + turn-journal crash-safe writer + lifecycle events)
 
 ### Added
diff --git a/api/streaming.py b/api/streaming.py
index 2e54193a..aef684ab 100644
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -877,9 +877,31 @@ def _title_retry_completion_budget(provider: str = '', model: str = '', base_url
 
 
 def _title_retry_status(status: str) -> bool:
+    # Whether to grant a second budget attempt within the same prompt+model
+    # combination.  ``llm_length`` indicates the model would have produced
+    # content with more headroom, so doubling the budget can help.
+    #
+    # ``llm_empty_reasoning`` historically also triggered a retry, but for
+    # reasoning models (Qwen3-thinking, DeepSeek-R1, Kimi-K2, etc.) that
+    # status means the model burned its entire budget on hidden reasoning
+    # tokens and emitted nothing visible.  Doubling the budget in that case
+    # just doubles the GPU/credit cost without changing the outcome — the
+    # next attempt produces the same shape.  We skip the retry for empty-
+    # reasoning statuses and let the title path fall through to the local
+    # fallback summary.  See issue #2083 for the LM Studio + Qwen3 repro.
     return status in {
         'llm_length',
         'llm_length_aux',
+    }
+
+
+def _title_should_skip_remaining_attempts(status: str) -> bool:
+    # When a reasoning model burns its budget on hidden reasoning,
+    # additional prompts against the same model will hit the same wall.
+    # Short-circuit the prompt-iteration loop so we don't issue a second
+    # full-budget LLM call (and twice the GPU/credit burn) only to land in
+    # the same fallback path.  See issue #2083.
+    return status in {
         'llm_empty_reasoning',
         'llm_empty_reasoning_aux',
     }
@@ -922,10 +944,16 @@ def _extract_title_response(resp, *, aux: bool = False) -> tuple[str, str]:
             or _safe_text_value(_safe_obj_value(message, 'reasoning_content'))
             or _safe_text_value(_safe_obj_value(message, 'thinking'))
         )
-        if finish_reason == 'length':
-            return '', f'llm_length{suffix}'
+        # When the model emitted reasoning tokens but no visible content, it
+        # burned its budget on hidden thinking — retrying with a larger budget
+        # almost never recovers a useful title (see issue #2083: Qwen3-thinking
+        # via LM Studio loops indefinitely on auto-title generation).  Report
+        # this case distinctly so callers can short-circuit instead of double-
+        # billing the GPU/credit on a near-certain repeat.
         if reasoning:
             return '', f'llm_empty_reasoning{suffix}'
+        if finish_reason == 'length':
+            return '', f'llm_length{suffix}'
         return '', f'llm_empty{suffix}'
     except Exception:
         return '', f'llm_empty{suffix}'
@@ -978,6 +1006,11 @@ def generate_title_raw_via_aux(
             except Exception as e:
                 last_status = 'llm_error_aux'
                 logger.debug("Aux title generation attempt %s failed: %s", idx + 1, e)
+            # If the model just burned its budget on hidden reasoning, retrying
+            # the next prompt against the same model produces the same shape.
+            # Short-circuit to the local fallback path (#2083).
+            if _title_should_skip_remaining_attempts(last_status):
+                break
         return None, last_status
     except Exception as e:
         logger.debug("Aux title generation failed: %s", e)
@@ -1077,6 +1110,11 @@ def generate_title_raw_via_agent(agent, user_text: str, assistant_text: str) ->
                     getattr(agent, 'model', None),
                     e,
                 )
+            # If the model just burned its budget on hidden reasoning, retrying
+            # the next prompt against the same model produces the same shape.
+            # Short-circuit to the local fallback path (#2083).
+            if _title_should_skip_remaining_attempts(last_status):
+                break
         return None, last_status
     except Exception as e:
         logger.debug("Agent title generation failed: %s", e)
diff --git a/tests/test_title_aux_routing.py b/tests/test_title_aux_routing.py
index 3027aef7..373f0747 100644
--- a/tests/test_title_aux_routing.py
+++ b/tests/test_title_aux_routing.py
@@ -133,19 +133,48 @@ class TestReasoningModelTitleGeneration(unittest.TestCase):
         self.assertEqual(_title_completion_budget(), 512)
         self.assertEqual(_title_retry_completion_budget(), 1024)
 
-    def test_aux_retries_empty_reasoning_length_response_with_larger_budget(self):
-        """If a reasoning model returns empty content at finish_reason=length, retry once."""
+    def test_aux_short_circuits_on_empty_reasoning_without_retrying(self):
+        """Regression for #2083: reasoning models that emit only hidden
+        reasoning tokens (no visible content) must NOT trigger a budget-doubling
+        retry — the second call invariably produces the same empty-reasoning
+        shape and just doubles the GPU/credit burn.  Short-circuit to the local
+        fallback path instead."""
         from api.streaming import generate_title_raw_via_aux
 
-        responses = [
-            {
+        call_count = [0]
+
+        def fake_call_llm(**kwargs):
+            call_count[0] += 1
+            return {
                 'choices': [
                     {
                         'message': {'content': '', 'reasoning': 'long hidden reasoning'},
                         'finish_reason': 'length',
                     }
                 ]
-            },
+            }
+
+        with _patch_tg_config({'provider': 'ollama', 'model': 'kimi-k2.6', 'base_url': 'https://ollama.com/v1'}):
+            with patch('agent.auxiliary_client.call_llm', side_effect=fake_call_llm, create=True):
+                result, status = generate_title_raw_via_aux(
+                    user_text='Hey nur ein kurzer Test',
+                    assistant_text='Alles klar, ich helfe dir dabei.',
+                )
+
+        self.assertIsNone(result)
+        self.assertEqual(status, 'llm_empty_reasoning_aux')
+        # One call per prompt at the base budget — no retry on prompt 0, no
+        # second-prompt attempt either (short-circuited).
+        self.assertEqual(call_count[0], 1)
+
+    def test_aux_still_retries_finish_length_without_reasoning(self):
+        """Length-truncated responses WITHOUT reasoning tokens still get the
+        budget-doubling retry — those are legitimately recoverable by giving
+        the model more headroom."""
+        from api.streaming import generate_title_raw_via_aux
+
+        responses = [
+            {'choices': [{'message': {'content': ''}, 'finish_reason': 'length'}]},
             {'choices': [{'message': {'content': 'Useful Session Title'}, 'finish_reason': 'stop'}]},
         ]
         captured_budgets = []
@@ -187,21 +216,58 @@ class TestReasoningModelTitleGeneration(unittest.TestCase):
                 )
 
         self.assertIsNone(result)
-        self.assertEqual(status, 'llm_length_aux')
+        self.assertEqual(status, 'llm_empty_reasoning_aux')
 
-    def test_agent_route_retries_empty_reasoning_length_response(self):
-        """The active-agent route should get the same reasoning-model retry path as aux."""
+    def test_agent_route_short_circuits_on_empty_reasoning_without_retrying(self):
+        """Regression for #2083 on the active-agent route: empty-reasoning
+        responses must NOT trigger a budget-doubling retry."""
         from api.streaming import generate_title_raw_via_agent
 
-        responses = [
-            {
+        call_count = [0]
+
+        def fake_create(**kwargs):
+            call_count[0] += 1
+            return {
                 'choices': [
                     {
                         'message': {'content': '', 'reasoning': 'long hidden reasoning'},
                         'finish_reason': 'length',
                     }
                 ]
-            },
+            }
+
+        client = types.SimpleNamespace(
+            chat=types.SimpleNamespace(
+                completions=types.SimpleNamespace(create=fake_create)
+            )
+        )
+        agent = MagicMock()
+        agent.api_mode = 'openai'
+        agent.provider = 'ollama'
+        agent.model = 'kimi-k2.6'
+        agent.base_url = 'https://ollama.com/v1'
+        agent.reasoning_config = None
+        agent._build_api_kwargs.return_value = {}
+        agent._ensure_primary_openai_client.return_value = client
+
+        result, status = generate_title_raw_via_agent(
+            agent,
+            user_text='Hey nur ein kurzer Test',
+            assistant_text='Alles klar, ich helfe dir dabei.',
+        )
+
+        self.assertIsNone(result)
+        self.assertEqual(status, 'llm_empty_reasoning')
+        # One call per prompt at base budget — no retry, no second-prompt attempt.
+        self.assertEqual(call_count[0], 1)
+        self.assertIsNone(agent.reasoning_config)
+
+    def test_agent_route_still_retries_finish_length_without_reasoning(self):
+        """The active-agent route should preserve retry-on-length-no-reasoning."""
+        from api.streaming import generate_title_raw_via_agent
+
+        responses = [
+            {'choices': [{'message': {'content': ''}, 'finish_reason': 'length'}]},
             {'choices': [{'message': {'content': 'Agent Session Title'}, 'finish_reason': 'stop'}]},
         ]
         captured_budgets = []