diff --git a/lib/proxy/openai/openai_request_parser.dart b/lib/proxy/openai/openai_request_parser.dart index b8439ac..ae36b95 100644 --- a/lib/proxy/openai/openai_request_parser.dart +++ b/lib/proxy/openai/openai_request_parser.dart @@ -146,12 +146,11 @@ class OpenAiRequestParser { throw const FormatException('`messages` must be an array.'); } - final leadingSystemParts = []; + final systemParts = []; final turns = []; final toolDeclarations = _parseTools(json['tools']); final toolCallNames = {}; final shouldIgnoreReasoningPrefill = _isGeminiModel(model); - var seenNonSystemMessage = false; for (final rawMessage in messages) { if (rawMessage is! Map) { @@ -164,16 +163,23 @@ class OpenAiRequestParser { if (text.isEmpty) { continue; } - if (!seenNonSystemMessage) { - leadingSystemParts.add(text); - } else { - turns.add(UnifiedTurn(role: 'user', parts: [UnifiedPart.text(text)])); - } + // System/developer messages can appear anywhere in the request. Clients + // like SillyTavern frequently inject large instruction blocks (thinking + // protocols, jailbreaks, "post-history instructions") *after* the real + // history and even after the user's latest message. + // + // These must NOT become a trailing user turn, nor be appended to the + // last user turn: doing so buries the genuine (often short) user + // question under a huge instruction block, and the model ends up + // responding to the instruction instead of the actual latest message. + // + // Instead, route every system/developer message into the system + // instruction. This keeps the real last user message as the final turn + // while still delivering the instructions where models expect them. + systemParts.add(text); continue; } - seenNonSystemMessage = true; - if (role == 'assistant' && shouldIgnoreReasoningPrefill && _isStandaloneReasoningPrefill(message['content']) && @@ -246,7 +252,7 @@ class OpenAiRequestParser { final jsonSchema = _readMapValue(responseFormat?['json_schema'], 'response_format.json_schema'); final googleWebSearchEnabled = _parseGoogleWebSearchEnabled(json); final kiroServerToolsEnabled = _parseKiroServerToolsEnabled(json); - final mergedSystemInstruction = leadingSystemParts.join('\n\n').trim(); + final mergedSystemInstruction = systemParts.join('\n\n').trim(); var systemInstruction = mergedSystemInstruction.isEmpty ? null : mergedSystemInstruction; if (turns.isEmpty && systemInstruction != null) { turns.add(UnifiedTurn(role: 'user', parts: [UnifiedPart.text(systemInstruction)])); diff --git a/test/openai_request_parser_test.dart b/test/openai_request_parser_test.dart index 115b381..68ab7c4 100644 --- a/test/openai_request_parser_test.dart +++ b/test/openai_request_parser_test.dart @@ -243,7 +243,7 @@ void main() { expect(functionCall.thoughtSignature, 'sig_read_file'); }); - test('keeps only leading system and developer notes in system instruction', () { + test('routes all system and developer messages into system instruction', () { final request = OpenAiRequestParser.parseChatRequest({ 'model': 'gemini-2.5-pro', 'messages': [ @@ -254,12 +254,14 @@ void main() { ], }, requestId: 'req_late_system'); - expect(request.systemInstruction, 'Lead instruction.'); - expect(request.turns, hasLength(3)); + // Every system/developer message (leading or mid-conversation) is routed + // into the system instruction. This keeps the real conversation turns + // intact and prevents an injected instruction from becoming a trailing turn + // that the model would answer instead of the actual latest message. + expect(request.systemInstruction, 'Lead instruction.\n\nLate policy update.'); + expect(request.turns, hasLength(2)); expect(request.turns.first.role, 'user'); expect(request.turns.first.parts.single.text, 'Hello'); - expect(request.turns[1].role, 'user'); - expect(request.turns[1].parts.single.text, 'Late policy update.'); expect(request.turns.last.role, 'assistant'); expect(request.turns.last.parts.single.text, 'Hi there'); }); @@ -275,12 +277,16 @@ void main() { ], }, requestId: 'req_trailing_system_after_assistant'); - expect(request.systemInstruction, 'You are a roleplay director.'); - expect(request.turns, hasLength(2)); - expect(request.turns[0].role, 'assistant'); - expect(request.turns[0].parts.single.text, 'Previous character reply.'); - expect(request.turns[1].role, 'user'); - expect(request.turns[1].parts.single.text, 'Pause roleplay and write the memory book.'); + // The standalone "" prefill is dropped for Gemini, and the trailing + // system instruction is merged into the system instruction rather than + // becoming a user turn. Only the genuine assistant reply remains as a turn. + expect( + request.systemInstruction, + 'You are a roleplay director.\n\nPause roleplay and write the memory book.', + ); + expect(request.turns, hasLength(1)); + expect(request.turns.single.role, 'assistant'); + expect(request.turns.single.parts.single.text, 'Previous character reply.'); }); test('preserves standalone reasoning prefills for non-Gemini models', () {