From 78b71b6c57bfe31ae3d1b5a8d52c95a8c21305ac Mon Sep 17 00:00:00 2001
From: goddammit1 <164378662+goddammit1@users.noreply.github.com>
Date: Fri, 19 Jun 2026 01:20:10 +0300
Subject: [PATCH 1/2] Route all system/developer messages into system
 instruction

Previously, system/developer messages that appeared after the conversation had started were converted into standalone user turns. With large requests, some clients inject big instruction blocks after the user's latest message, which made that injected text become the final turn (or get appended to the user's message). As a result the model could respond to the injected instruction instead of the actual latest user message.

Now every system/developer message is merged into the system instruction regardless of position, so the genuine last user message always stays the final turn. Updated parser tests accordingly.
---
 lib/proxy/openai/openai_request_parser.dart | 26 +++++++++++++--------
 test/openai_request_parser_test.dart        | 26 +++++++++++++--------
 2 files changed, 32 insertions(+), 20 deletions(-)
diff --git a/lib/proxy/openai/openai_request_parser.dart b/lib/proxy/openai/openai_request_parser.dart
index b8439ac..ae36b95 100644
--- a/lib/proxy/openai/openai_request_parser.dart
+++ b/lib/proxy/openai/openai_request_parser.dart
@@ -146,12 +146,11 @@ class OpenAiRequestParser {
       throw const FormatException('`messages` must be an array.');
     }
 
-    final leadingSystemParts = <String>[];
+    final systemParts = <String>[];
     final turns = <UnifiedTurn>[];
     final toolDeclarations = _parseTools(json['tools']);
     final toolCallNames = <String, String>{};
     final shouldIgnoreReasoningPrefill = _isGeminiModel(model);
-    var seenNonSystemMessage = false;
 
     for (final rawMessage in messages) {
       if (rawMessage is! Map) {
@@ -164,16 +163,23 @@ class OpenAiRequestParser {
         if (text.isEmpty) {
           continue;
         }
-        if (!seenNonSystemMessage) {
-          leadingSystemParts.add(text);
-        } else {
-          turns.add(UnifiedTurn(role: 'user', parts: [UnifiedPart.text(text)]));
-        }
+        // System/developer messages can appear anywhere in the request. Clients
+        // like SillyTavern frequently inject large instruction blocks (thinking
+        // protocols, jailbreaks, "post-history instructions") *after* the real
+        // history and even after the user's latest message.
+        //
+        // These must NOT become a trailing user turn, nor be appended to the
+        // last user turn: doing so buries the genuine (often short) user
+        // question under a huge instruction block, and the model ends up
+        // responding to the instruction instead of the actual latest message.
+        //
+        // Instead, route every system/developer message into the system
+        // instruction. This keeps the real last user message as the final turn
+        // while still delivering the instructions where models expect them.
+        systemParts.add(text);
         continue;
       }
 
-      seenNonSystemMessage = true;
-
       if (role == 'assistant' &&
           shouldIgnoreReasoningPrefill &&
           _isStandaloneReasoningPrefill(message['content']) &&
@@ -246,7 +252,7 @@ class OpenAiRequestParser {
     final jsonSchema = _readMapValue(responseFormat?['json_schema'], 'response_format.json_schema');
     final googleWebSearchEnabled = _parseGoogleWebSearchEnabled(json);
     final kiroServerToolsEnabled = _parseKiroServerToolsEnabled(json);
-    final mergedSystemInstruction = leadingSystemParts.join('\n\n').trim();
+    final mergedSystemInstruction = systemParts.join('\n\n').trim();
     var systemInstruction = mergedSystemInstruction.isEmpty ? null : mergedSystemInstruction;
     if (turns.isEmpty && systemInstruction != null) {
       turns.add(UnifiedTurn(role: 'user', parts: [UnifiedPart.text(systemInstruction)]));
diff --git a/test/openai_request_parser_test.dart b/test/openai_request_parser_test.dart
index 115b381..e347c95 100644
--- a/test/openai_request_parser_test.dart
+++ b/test/openai_request_parser_test.dart
@@ -254,12 +254,14 @@ void main() {
       ],
     }, requestId: 'req_late_system');
 
-    expect(request.systemInstruction, 'Lead instruction.');
-    expect(request.turns, hasLength(3));
+    // Every system/developer message (leading or mid-conversation) is routed
+    // into the system instruction. This keeps the real conversation turns
+    // intact and prevents an injected instruction from becoming a trailing turn
+    // that the model would answer instead of the actual latest message.
+    expect(request.systemInstruction, 'Lead instruction.\n\nLate policy update.');
+    expect(request.turns, hasLength(2));
     expect(request.turns.first.role, 'user');
     expect(request.turns.first.parts.single.text, 'Hello');
-    expect(request.turns[1].role, 'user');
-    expect(request.turns[1].parts.single.text, 'Late policy update.');
     expect(request.turns.last.role, 'assistant');
     expect(request.turns.last.parts.single.text, 'Hi there');
   });
@@ -275,12 +277,16 @@ void main() {
       ],
     }, requestId: 'req_trailing_system_after_assistant');
 
-    expect(request.systemInstruction, 'You are a roleplay director.');
-    expect(request.turns, hasLength(2));
-    expect(request.turns[0].role, 'assistant');
-    expect(request.turns[0].parts.single.text, 'Previous character reply.');
-    expect(request.turns[1].role, 'user');
-    expect(request.turns[1].parts.single.text, 'Pause roleplay and write the memory book.');
+    // The standalone "<think>" prefill is dropped for Gemini, and the trailing
+    // system instruction is merged into the system instruction rather than
+    // becoming a user turn. Only the genuine assistant reply remains as a turn.
+    expect(
+      request.systemInstruction,
+      'You are a roleplay director.\n\nPause roleplay and write the memory book.',
+    );
+    expect(request.turns, hasLength(1));
+    expect(request.turns.single.role, 'assistant');
+    expect(request.turns.single.parts.single.text, 'Previous character reply.');
   });
 
   test('preserves standalone reasoning prefills for non-Gemini models', () {

From 212126d5a6bc76588a15c012dbe20fe948304254 Mon Sep 17 00:00:00 2001
From: goddammit1 <164378662+goddammit1@users.noreply.github.com>
Date: Fri, 19 Jun 2026 01:28:33 +0300
Subject: [PATCH 2/2] Rename test to reflect new system message routing
 behavior

---
 test/openai_request_parser_test.dart | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/openai_request_parser_test.dart b/test/openai_request_parser_test.dart
index e347c95..68ab7c4 100644
--- a/test/openai_request_parser_test.dart
+++ b/test/openai_request_parser_test.dart
@@ -243,7 +243,7 @@ void main() {
     expect(functionCall.thoughtSignature, 'sig_read_file');
   });
 
-  test('keeps only leading system and developer notes in system instruction', () {
+  test('routes all system and developer messages into system instruction', () {
     final request = OpenAiRequestParser.parseChatRequest({
       'model': 'gemini-2.5-pro',
       'messages': [