From 42238adab077bd3f39f862f92501bfa3d71bfae8 Mon Sep 17 00:00:00 2001 From: Vincent Koc <25068+vincentkoc@users.noreply.github.com> Date: Thu, 18 Jun 2026 16:19:02 +0800 Subject: [PATCH] fix(core): preserve compound command output --- src/cli/main.ts | 1 + src/core/command-match.ts | 35 ++++++++++++++++ src/core/command.ts | 1 + src/core/compaction-metadata.ts | 2 +- src/core/reduce.ts | 22 +++++++--- test/cli/main.test.ts | 27 +++++++++++++ test/core/command.test.ts | 21 ++++++++++ test/core/reduce.test.ts | 71 ++++++++++++++++++++++++++++----- 8 files changed, 163 insertions(+), 17 deletions(-) diff --git a/src/cli/main.ts b/src/cli/main.ts index 6c2e3420..d2ca6d2c 100644 --- a/src/cli/main.ts +++ b/src/cli/main.ts @@ -723,6 +723,7 @@ export function decorateWrapInlineText(result: WrapResult["result"], raw: boolea "", "---", WRAP_AUTHORITATIVE_FOOTER, + ...(result.rawRef?.id ? [`Raw output is available locally: tokenjuice cat ${result.rawRef.id}`] : []), ].join("\n"); return `${result.inlineText}${footer}`; } diff --git a/src/core/command-match.ts b/src/core/command-match.ts index ae9721d7..72db0475 100644 --- a/src/core/command-match.ts +++ b/src/core/command-match.ts @@ -2,6 +2,7 @@ import type { CommandMatchSource, ToolExecutionInput } from "../types.js"; import { ENV_ASSIGNMENT_PATTERN, + hasSequentialShellCommands, isCompoundShellCommand, splitTopLevelCommandChain, stripLeadingEnvAssignmentsFromCommand, @@ -523,6 +524,40 @@ export function resolveEffectiveCommand(input: Pick): boolean { + const shellBody = unwrapShellRunner(input); + const command = (shellBody ?? input.command ?? "").trim(); + if (!command || !hasSequentialShellCommands(command)) { + return false; + } + + let effectiveCommand = command; + for (let iteration = 0; iteration < 16; iteration += 1) { + const setupIfTail = stripLeadingSetupIfBlock(effectiveCommand); + const setupSegmentTail = setupIfTail ?? stripLeadingSetupSegment(effectiveCommand); + if (!setupSegmentTail) { + break; + } + effectiveCommand = setupSegmentTail; + } + + let substantiveCount = 0; + for (const sequenceSegment of splitTopLevelCommandChain(effectiveCommand)) { + for (const segment of splitTopLevelOrChain(sequenceSegment)) { + const candidate = buildEffectiveCandidate(tokenizeCommand(segment), true, segment); + if (!candidate) { + continue; + } + substantiveCount += 1; + if (substantiveCount > 1) { + return true; + } + } + } + + return false; +} + export function getEffectiveCommandArgv(input: Pick): string[] { return resolveEffectiveCommand(input)?.argv ?? getCandidateArgv(input); } diff --git a/src/core/command.ts b/src/core/command.ts index a3d6730a..8700eeca 100644 --- a/src/core/command.ts +++ b/src/core/command.ts @@ -10,6 +10,7 @@ export type { CommandMatchCandidate } from "./command-match.js"; export { deriveCommandMatchCandidates, getEffectiveCommandArgv, + hasMultipleSubstantiveShellCommands, isSetupWrapperSegment, resolveEffectiveCommand, stripLeadingEnvAssignments, diff --git a/src/core/compaction-metadata.ts b/src/core/compaction-metadata.ts index 73e76b0e..87cc209f 100644 --- a/src/core/compaction-metadata.ts +++ b/src/core/compaction-metadata.ts @@ -23,7 +23,7 @@ export const NO_COMPACTION_METADATA: CompactionMetadata = { kinds: [], }; -export const WRAP_AUTHORITATIVE_FOOTER = "[tokenjuice] This is the complete, authoritative output for this command. It was deterministically compacted to remove low-signal noise; the omitted content is not retrievable. Do not re-run the command, vary flags, or switch tools to try to recover it. Proceed with the task using this output."; +export const WRAP_AUTHORITATIVE_FOOTER = "[tokenjuice] This is the complete, authoritative output for this command. It was deterministically compacted to remove low-signal noise. Do not re-run the command, vary flags, or switch tools to try to recover omitted content; use a raw-artifact recovery command below when one is provided. Proceed with the task using this output."; function buildCompactionMetadata(authoritative: boolean, ...kinds: CompactionKind[]): CompactionMetadata { if (kinds.length === 0) { diff --git a/src/core/reduce.ts b/src/core/reduce.ts index e5e0c9af..4adbb7a3 100644 --- a/src/core/reduce.ts +++ b/src/core/reduce.ts @@ -1,4 +1,5 @@ import { loadRules } from "./rules.js"; +import { hasMultipleSubstantiveShellCommands } from "./command-match.js"; import { classifyExecution, resolveRuleMatch } from "./classify.js"; import { isFileContentInspectionCommand } from "./command-identity.js"; import { normalizeExecutionInput } from "./execution-input.js"; @@ -323,12 +324,21 @@ export async function reduceExecutionWithRules( reducedChars, ratio: measuredRawChars === 0 ? 1 : reducedChars / measuredRawChars, }); - const resolvedMatch = opts.classifier + const multipleSubstantiveCommands = !opts.classifier && hasMultipleSubstantiveShellCommands(input); + const resolvedMatch = opts.classifier || multipleSubstantiveCommands ? undefined : resolveRuleMatch(input, rules); - const classification = resolvedMatch?.classification - ?? classifyExecution(input, rules, opts.classifier); - const reducerInput = resolvedMatch?.candidateInput ?? normalizedInput; + const classification = multipleSubstantiveCommands + ? { + family: "generic", + confidence: 0.2, + matchedReducer: "generic/fallback", + } + : resolvedMatch?.classification + ?? classifyExecution(input, rules, opts.classifier); + const reducerInput = multipleSubstantiveCommands + ? normalizedInput + : resolvedMatch?.candidateInput ?? normalizedInput; const trace = opts.trace ? { ...(normalizedInput.command ? { normalizedCommand: normalizedInput.command } : {}), @@ -390,7 +400,9 @@ export async function reduceExecutionWithRules( }; } - const inspectionSummary = buildInspectionSummary(normalizedInput, rawText, opts.noOmit); + const inspectionSummary = multipleSubstantiveCommands + ? null + : buildInspectionSummary(normalizedInput, rawText, opts.noOmit); if (inspectionSummary) { const summaryText = inspectionSummary.lines.join("\n").trim(); const selectedText = clampTextMiddleWithMetadata(summaryText, maxInlineChars, opts.noOmit); diff --git a/test/cli/main.test.ts b/test/cli/main.test.ts index e6ecb982..580a4fe3 100644 --- a/test/cli/main.test.ts +++ b/test/cli/main.test.ts @@ -51,6 +51,33 @@ describe("decorateWrapInlineText", () => { expect(decorateWrapInlineText(result, false)).toContain(WRAP_AUTHORITATIVE_FOOTER); }); + it("provides the raw-artifact recovery command when output is stored", () => { + const result: CompactResult = { + inlineText: "summary", + compaction: { + authoritative: true, + kinds: ["head-tail-omission"], + }, + rawRef: { + id: "tj_0123456789ab", + path: "/tmp/tokenjuice/raw.txt", + metadataPath: "/tmp/tokenjuice/meta.json", + }, + stats: { + rawChars: 4_000, + reducedChars: 40, + ratio: 0.01, + }, + classification: { + family: "generic", + confidence: 0.9, + matchedReducer: "generic/fallback", + }, + }; + + expect(decorateWrapInlineText(result, false)).toContain("tokenjuice cat tj_0123456789ab"); + }); + it("suppresses the authoritative footer for lossless rewrites", () => { const result: CompactResult = { inlineText: "summary", diff --git a/test/core/command.test.ts b/test/core/command.test.ts index 1ecf48c6..cdeef352 100644 --- a/test/core/command.test.ts +++ b/test/core/command.test.ts @@ -3,6 +3,7 @@ import { describe, expect, it } from "vitest"; import { deriveCommandMatchCandidates, getGitSubcommand, + hasMultipleSubstantiveShellCommands, hasSequentialShellCommands, isFileContentInspectionCommand, isRepositoryInspectionCommand, @@ -371,6 +372,26 @@ describe("hasSequentialShellCommands", () => { }); }); +describe("hasMultipleSubstantiveShellCommands", () => { + it.each([ + "grep -i github /etc/hosts; echo '---dig:'; dig +short api.github.com @1.1.1.1; scutil --dns", + "cd repo && swift test && rg -n failure src", + "command -v rg || cargo install ripgrep; rg --files src", + "bash -lc 'grep -i github /etc/hosts; dig +short api.github.com @1.1.1.1'", + ])("detects `%s` as multiple substantive commands", (command) => { + expect(hasMultipleSubstantiveShellCommands({ command })).toBe(true); + }); + + it.each([ + "cd repo && pnpm test", + "source .env && cargo test", + "if command -v tt >/dev/null 2>&1; then tt title 'tests'; else tmux select-pane -T 'tests' 2>/dev/null || true; fi; pnpm test", + "bash -lc 'cd repo && pnpm test'", + ])("keeps setup-wrapped `%s` as one substantive command", (command) => { + expect(hasMultipleSubstantiveShellCommands({ command })).toBe(false); + }); +}); + describe("getGitSubcommand", () => { it.each([ { command: "git ls-files src", subcommand: "ls-files" }, diff --git a/test/core/reduce.test.ts b/test/core/reduce.test.ts index d619c9c6..c38d2fef 100644 --- a/test/core/reduce.test.ts +++ b/test/core/reduce.test.ts @@ -710,7 +710,7 @@ describe("reduceExecution", () => { } }); - it("matches wrapped rg search commands and prefers the first substantive command in a chain", async () => { + it("matches wrapped rg search commands after setup commands", async () => { const searchResult = await reduceExecution({ toolName: "exec", command: "pwd && rg -n AssertionError src", @@ -721,20 +721,69 @@ describe("reduceExecution", () => { expect(searchResult.classification.matchedReducer).toBe("search/rg"); expect(searchResult.classification.matchedVia).toBe("effective"); expect(searchResult.classification.matchedCommand).toBe("rg -n AssertionError src"); + }); + + it("preserves all short output from a multi-command sequence", async () => { + const rawText = [ + "127.0.0.1 github.com", + "---dig:", + "140.82.121.4", + "---scutil:", + "DNS configuration", + ].join("\n"); - const firstCommandWins = await reduceExecution({ + const result = await reduceExecution({ toolName: "exec", - command: "cd repo && swift test && rg -n failure src", - combinedText: [ - "Test Case 'FooTests.testExample' failed (0.12 seconds).", - "Executed 1 test, with 1 failure (0 unexpected) in 0.12 (0.12) seconds", - ].join("\n"), - exitCode: 1, + command: "grep -i github /etc/hosts; echo '---dig:'; dig +short api.github.com @1.1.1.1; echo '---scutil:'; scutil --dns | head -1", + combinedText: rawText, + exitCode: 0, }); - expect(firstCommandWins.classification.matchedReducer).toBe("tests/swift-test"); - expect(firstCommandWins.classification.matchedVia).toBe("effective"); - expect(firstCommandWins.classification.matchedCommand).toBe("swift test"); + expect(result.classification.matchedReducer).toBe("generic/fallback"); + expect(result.inlineText).toBe(rawText); + expect(result.stats.ratio).toBe(1); + }); + + it("uses authoritative generic compaction for large multi-command output", async () => { + const rawText = Array.from({ length: 80 }, (_, index) => `output ${index + 1} ${"x".repeat(48)}`).join("\n"); + const result = await reduceExecution({ + toolName: "exec", + command: "grep -i github /etc/hosts; dig +short api.github.com @1.1.1.1", + combinedText: rawText, + exitCode: 0, + }, { + maxInlineChars: 240, + }); + + expect(result.classification.matchedReducer).toBe("generic/fallback"); + expect(result.inlineText).toContain("output 1"); + expect(result.inlineText).toContain("output 80"); + expect(result.inlineText).not.toContain("output 40"); + expect(result.compaction).toEqual({ + authoritative: true, + kinds: expect.arrayContaining(["head-tail-omission"]), + }); + }); + + it("does not summarize file inspection output from a multi-command sequence", async () => { + const rawText = [ + "{", + " \"name\": \"example\",", + " \"lockfileVersion\": 3,", + " \"packages\": {}", + "}", + "DONE", + ].join("\n"); + const result = await reduceExecution({ + toolName: "exec", + command: "cat package-lock.json; echo DONE", + combinedText: rawText, + exitCode: 0, + }); + + expect(result.classification.matchedReducer).toBe("generic/fallback"); + expect(result.inlineText).toBe(rawText); + expect(result.stats.ratio).toBe(1); }); it("keeps wrapped file inspection output verbatim under generic fallback", async () => {