Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 16 additions & 10 deletions lib/proxy/openai/openai_request_parser.dart
Original file line number Diff line number Diff line change
Expand Up @@ -146,12 +146,11 @@ class OpenAiRequestParser {
throw const FormatException('`messages` must be an array.');
}

final leadingSystemParts = <String>[];
final systemParts = <String>[];
final turns = <UnifiedTurn>[];
final toolDeclarations = _parseTools(json['tools']);
final toolCallNames = <String, String>{};
final shouldIgnoreReasoningPrefill = _isGeminiModel(model);
var seenNonSystemMessage = false;

for (final rawMessage in messages) {
if (rawMessage is! Map) {
Expand All @@ -164,16 +163,23 @@ class OpenAiRequestParser {
if (text.isEmpty) {
continue;
}
if (!seenNonSystemMessage) {
leadingSystemParts.add(text);
} else {
turns.add(UnifiedTurn(role: 'user', parts: [UnifiedPart.text(text)]));
}
// System/developer messages can appear anywhere in the request. Clients
// like SillyTavern frequently inject large instruction blocks (thinking
// protocols, jailbreaks, "post-history instructions") *after* the real
// history and even after the user's latest message.
//
// These must NOT become a trailing user turn, nor be appended to the
// last user turn: doing so buries the genuine (often short) user
// question under a huge instruction block, and the model ends up
// responding to the instruction instead of the actual latest message.
//
// Instead, route every system/developer message into the system
// instruction. This keeps the real last user message as the final turn
// while still delivering the instructions where models expect them.
systemParts.add(text);
continue;
}

seenNonSystemMessage = true;

if (role == 'assistant' &&
shouldIgnoreReasoningPrefill &&
_isStandaloneReasoningPrefill(message['content']) &&
Expand Down Expand Up @@ -246,7 +252,7 @@ class OpenAiRequestParser {
final jsonSchema = _readMapValue(responseFormat?['json_schema'], 'response_format.json_schema');
final googleWebSearchEnabled = _parseGoogleWebSearchEnabled(json);
final kiroServerToolsEnabled = _parseKiroServerToolsEnabled(json);
final mergedSystemInstruction = leadingSystemParts.join('\n\n').trim();
final mergedSystemInstruction = systemParts.join('\n\n').trim();
var systemInstruction = mergedSystemInstruction.isEmpty ? null : mergedSystemInstruction;
if (turns.isEmpty && systemInstruction != null) {
turns.add(UnifiedTurn(role: 'user', parts: [UnifiedPart.text(systemInstruction)]));
Expand Down
28 changes: 17 additions & 11 deletions test/openai_request_parser_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ void main() {
expect(functionCall.thoughtSignature, 'sig_read_file');
});

test('keeps only leading system and developer notes in system instruction', () {
test('routes all system and developer messages into system instruction', () {
final request = OpenAiRequestParser.parseChatRequest({
'model': 'gemini-2.5-pro',
'messages': [
Expand All @@ -254,12 +254,14 @@ void main() {
],
}, requestId: 'req_late_system');

expect(request.systemInstruction, 'Lead instruction.');
expect(request.turns, hasLength(3));
// Every system/developer message (leading or mid-conversation) is routed
// into the system instruction. This keeps the real conversation turns
// intact and prevents an injected instruction from becoming a trailing turn
// that the model would answer instead of the actual latest message.
expect(request.systemInstruction, 'Lead instruction.\n\nLate policy update.');
Comment on lines +257 to +261

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The test name on line 246 (keeps only leading system and developer notes in system instruction) is now outdated and misleading, as the behavior has been changed to route all system and developer messages (including mid-conversation ones) into the system instruction. Please update the test name to reflect this new behavior, for example: routes all system and developer messages into system instruction.

expect(request.turns, hasLength(2));
expect(request.turns.first.role, 'user');
expect(request.turns.first.parts.single.text, 'Hello');
expect(request.turns[1].role, 'user');
expect(request.turns[1].parts.single.text, 'Late policy update.');
expect(request.turns.last.role, 'assistant');
expect(request.turns.last.parts.single.text, 'Hi there');
});
Expand All @@ -275,12 +277,16 @@ void main() {
],
}, requestId: 'req_trailing_system_after_assistant');

expect(request.systemInstruction, 'You are a roleplay director.');
expect(request.turns, hasLength(2));
expect(request.turns[0].role, 'assistant');
expect(request.turns[0].parts.single.text, 'Previous character reply.');
expect(request.turns[1].role, 'user');
expect(request.turns[1].parts.single.text, 'Pause roleplay and write the memory book.');
// The standalone "<think>" prefill is dropped for Gemini, and the trailing
// system instruction is merged into the system instruction rather than
// becoming a user turn. Only the genuine assistant reply remains as a turn.
expect(
request.systemInstruction,
'You are a roleplay director.\n\nPause roleplay and write the memory book.',
);
expect(request.turns, hasLength(1));
expect(request.turns.single.role, 'assistant');
expect(request.turns.single.parts.single.text, 'Previous character reply.');
});

test('preserves standalone reasoning prefills for non-Gemini models', () {
Expand Down