Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/main/java/dev/talos/cli/modes/ExecutionOutcome.java
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,9 @@ static ExecutionOutcome fromToolLoop(
boolean partialMutation = !Objects.equals(current, shaped);
current = shaped;

current = MutationFailureAnswerRenderer.discloseActionObligationBlockedAfterMutationIfNeeded(
current, loopResult, extraMutationSuccesses);

boolean falseMutationClaim = false;
if (!invalidMutation) {
shaped = MutationFailureAnswerRenderer.annotateIfFalseMutationClaim(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import dev.talos.tools.ToolError;

import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;
Expand Down Expand Up @@ -150,6 +151,31 @@ public static String summarizePartialMutationOutcomesIfNeeded(
return out.toString().stripTrailing();
}

public static String discloseActionObligationBlockedAfterMutationIfNeeded(
String answer,
ToolCallLoop.LoopResult loopResult,
int extraMutationSuccesses
) {
if (answer == null || answer.isBlank()) return answer;
if (!answer.startsWith("[Action obligation failed:")) return answer;
if (loopResult == null) return answer;
if (loopResult.mutatingToolSuccesses() + Math.max(0, extraMutationSuccesses) <= 0) {
return answer;
}
List<String> changedTargets = successfulMutatingTargets(loopResult);
if (changedTargets.isEmpty()) return answer;
if (answer.contains("Changed target(s) before the block:")) return answer;

String cleaned = removeNoMutationAppliedClauses(answer);
StringBuilder out = new StringBuilder();
out.append("[Truth check: Talos applied mutation(s) before this action-obligation block.]\n\n");
out.append("Changed target(s) before the block: ")
.append(String.join(", ", changedTargets))
.append(".\n\n");
out.append(cleaned);
return out.toString().stripTrailing();
}

public static String summarizeDeniedMutationOutcomesIfNeeded(
String answer,
CurrentTurnPlan plan,
Expand Down Expand Up @@ -330,6 +356,26 @@ private static String trimFailureMessage(String errorMessage) {
return msg;
}

private static List<String> successfulMutatingTargets(ToolCallLoop.LoopResult loopResult) {
if (loopResult == null || loopResult.toolOutcomes() == null) return List.of();
LinkedHashSet<String> targets = new LinkedHashSet<>();
for (ToolCallLoop.ToolOutcome outcome : loopResult.toolOutcomes()) {
if (outcome == null || !outcome.mutating() || !outcome.success()) continue;
String target = outcome.pathHint() == null ? "" : outcome.pathHint().strip().replace('\\', '/');
if (target.isBlank()) target = outcome.toolName();
if (!target.isBlank()) targets.add(target);
}
return List.copyOf(targets);
}

private static String removeNoMutationAppliedClauses(String answer) {
String cleaned = answer
.replace("No approval was requested and no additional file was changed.", "")
.replace("No approval was requested and no file was changed.", "")
.replace("No approval was requested and no additional file change was made.", "");
return cleaned.replaceAll("(?m)[ \\t]+$", "").strip();
}

private static boolean planRequestsMutation(CurrentTurnPlan plan, List<ChatMessage> messages) {
CurrentTurnPlan safePlan = safePlanFromMessages(plan, messages);
TaskContract contract = safePlan.taskContract();
Expand Down
109 changes: 108 additions & 1 deletion src/test/java/dev/talos/cli/modes/ExecutionOutcomeTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -682,7 +682,14 @@ Remaining target(s): script.js.
assertEquals(TaskCompletionStatus.BLOCKED_BY_POLICY, outcome.taskOutcome().completionStatus());
assertEquals(ExecutionOutcome.VerificationStatus.NOT_RUN, outcome.verificationStatus());
assertTrue(outcome.taskOutcome().hasWarning(TruthWarningType.FAILED_ACTION_OBLIGATION));
assertTrue(outcome.finalAnswer().startsWith("[Action obligation failed:"), outcome.finalAnswer());
assertTrue(outcome.finalAnswer().startsWith(
"[Truth check: Talos applied mutation(s) before this action-obligation block.]"),
outcome.finalAnswer());
assertTrue(outcome.finalAnswer().contains(
"Changed target(s) before the block: index.html, styles.css, scripts.js."),
outcome.finalAnswer());
assertTrue(outcome.finalAnswer().contains("[Action obligation failed:"),
outcome.finalAnswer());
assertFalse(outcome.finalAnswer().contains("Static verification: passed"), outcome.finalAnswer());
assertNotNull(trace);
assertNotNull(trace.outcome());
Expand All @@ -700,6 +707,106 @@ Remaining target(s): script.js.
}
}

@Test
void blockedActionObligationAfterSuccessfulMutationDisclosesChangedTarget() {
var messages = new ArrayList<ChatMessage>();
messages.add(ChatMessage.system("sys"));
messages.add(ChatMessage.user(
"Rewrite styles.css so index.html still works. Do not edit scripts.js."));

String answer = """
[Action obligation failed: expected-target progress was not satisfied.]

Remaining target(s): scripts.js.
The model attempted talos.write_file(styles.css) instead.
No approval was requested and no additional file was changed.
""";
var loopResult = new ToolCallLoop.LoopResult(
answer,
2,
1,
List.of("talos.write_file"),
List.of(),
0,
0,
false,
1,
List.of(),
0,
0,
0,
0,
FailureDecision.stop(
FailureAction.ASK_USER,
"Pending action obligation EXPECTED_TARGETS_REMAINING was ignored after a progress reprompt."),
List.of(new ToolCallLoop.ToolOutcome(
"talos.write_file",
"styles.css",
true,
true,
false,
"wrote styles.css",
"",
dev.talos.tools.VerificationStatus.PASS)));

ExecutionOutcome outcome = ExecutionOutcome.fromToolLoop(
loopResult.finalAnswer(), messages, loopResult, null, 0);

assertEquals(ExecutionOutcome.CompletionStatus.BLOCKED, outcome.completionStatus());
assertEquals(TaskCompletionStatus.BLOCKED_BY_POLICY, outcome.taskOutcome().completionStatus());
assertTrue(outcome.taskOutcome().hasWarning(TruthWarningType.FAILED_ACTION_OBLIGATION));
assertTrue(outcome.finalAnswer().contains("Changed target(s) before the block: styles.css."),
outcome.finalAnswer());
assertFalse(outcome.finalAnswer().contains("No approval was requested"),
outcome.finalAnswer());
assertFalse(outcome.finalAnswer().contains("no additional file was changed"),
outcome.finalAnswer());
}

@Test
void preMutationActionObligationBlockKeepsNoFileChangedWording() {
var messages = new ArrayList<ChatMessage>();
messages.add(ChatMessage.system("sys"));
messages.add(ChatMessage.user("Edit styles.css."));

String answer = """
[Action obligation failed: expected-target progress was not satisfied.]

Remaining target(s): styles.css.
The model returned prose instead of the required write/edit tool call.
No approval was requested and no additional file was changed.
""";
var loopResult = new ToolCallLoop.LoopResult(
answer,
1,
0,
List.of(),
List.of(),
0,
0,
false,
0,
List.of(),
0,
0,
0,
0,
FailureDecision.stop(
FailureAction.ASK_USER,
"Pending action obligation EXPECTED_TARGETS_REMAINING was ignored after a progress reprompt."),
List.of());

ExecutionOutcome outcome = ExecutionOutcome.fromToolLoop(
loopResult.finalAnswer(), messages, loopResult, null, 0);

assertEquals(ExecutionOutcome.CompletionStatus.BLOCKED, outcome.completionStatus());
assertEquals(TaskCompletionStatus.BLOCKED_BY_POLICY, outcome.taskOutcome().completionStatus());
assertTrue(outcome.finalAnswer().contains("No approval was requested"),
outcome.finalAnswer());
assertTrue(outcome.finalAnswer().contains("no additional file was changed"),
outcome.finalAnswer());
}

@Test
void embeddedStaticVerificationFailureInBlockedToolLoopIsRecordedInOutcomeAndTrace() throws Exception {
Path ws = Files.createTempDirectory("talos-embedded-static-failure-");
Expand Down