From 42238adab077bd3f39f862f92501bfa3d71bfae8 Mon Sep 17 00:00:00 2001
From: Vincent Koc <25068+vincentkoc@users.noreply.github.com>
Date: Thu, 18 Jun 2026 16:19:02 +0800
Subject: [PATCH] fix(core): preserve compound command output

---
 src/cli/main.ts                 |  1 +
 src/core/command-match.ts       | 35 ++++++++++++++++
 src/core/command.ts             |  1 +
 src/core/compaction-metadata.ts |  2 +-
 src/core/reduce.ts              | 22 +++++++---
 test/cli/main.test.ts           | 27 +++++++++++++
 test/core/command.test.ts       | 21 ++++++++++
 test/core/reduce.test.ts        | 71 ++++++++++++++++++++++++++++-----
 8 files changed, 163 insertions(+), 17 deletions(-)

diff --git a/src/cli/main.ts b/src/cli/main.ts
index 6c2e3420..d2ca6d2c 100644
--- a/src/cli/main.ts
+++ b/src/cli/main.ts
@@ -723,6 +723,7 @@ export function decorateWrapInlineText(result: WrapResult["result"], raw: boolea
     "",
     "---",
     WRAP_AUTHORITATIVE_FOOTER,
+    ...(result.rawRef?.id ? [`Raw output is available locally: tokenjuice cat ${result.rawRef.id}`] : []),
   ].join("\n");
   return `${result.inlineText}${footer}`;
 }
diff --git a/src/core/command-match.ts b/src/core/command-match.ts
index ae9721d7..72db0475 100644
--- a/src/core/command-match.ts
+++ b/src/core/command-match.ts
@@ -2,6 +2,7 @@ import type { CommandMatchSource, ToolExecutionInput } from "../types.js";
 
 import {
   ENV_ASSIGNMENT_PATTERN,
+  hasSequentialShellCommands,
   isCompoundShellCommand,
   splitTopLevelCommandChain,
   stripLeadingEnvAssignmentsFromCommand,
@@ -523,6 +524,40 @@ export function resolveEffectiveCommand(input: Pick<ToolExecutionInput, "argv" |
   return null;
 }
 
+export function hasMultipleSubstantiveShellCommands(input: Pick<ToolExecutionInput, "argv" | "command">): boolean {
+  const shellBody = unwrapShellRunner(input);
+  const command = (shellBody ?? input.command ?? "").trim();
+  if (!command || !hasSequentialShellCommands(command)) {
+    return false;
+  }
+
+  let effectiveCommand = command;
+  for (let iteration = 0; iteration < 16; iteration += 1) {
+    const setupIfTail = stripLeadingSetupIfBlock(effectiveCommand);
+    const setupSegmentTail = setupIfTail ?? stripLeadingSetupSegment(effectiveCommand);
+    if (!setupSegmentTail) {
+      break;
+    }
+    effectiveCommand = setupSegmentTail;
+  }
+
+  let substantiveCount = 0;
+  for (const sequenceSegment of splitTopLevelCommandChain(effectiveCommand)) {
+    for (const segment of splitTopLevelOrChain(sequenceSegment)) {
+      const candidate = buildEffectiveCandidate(tokenizeCommand(segment), true, segment);
+      if (!candidate) {
+        continue;
+      }
+      substantiveCount += 1;
+      if (substantiveCount > 1) {
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
 export function getEffectiveCommandArgv(input: Pick<ToolExecutionInput, "argv" | "command">): string[] {
   return resolveEffectiveCommand(input)?.argv ?? getCandidateArgv(input);
 }
diff --git a/src/core/command.ts b/src/core/command.ts
index a3d6730a..8700eeca 100644
--- a/src/core/command.ts
+++ b/src/core/command.ts
@@ -10,6 +10,7 @@ export type { CommandMatchCandidate } from "./command-match.js";
 export {
   deriveCommandMatchCandidates,
   getEffectiveCommandArgv,
+  hasMultipleSubstantiveShellCommands,
   isSetupWrapperSegment,
   resolveEffectiveCommand,
   stripLeadingEnvAssignments,
diff --git a/src/core/compaction-metadata.ts b/src/core/compaction-metadata.ts
index 73e76b0e..87cc209f 100644
--- a/src/core/compaction-metadata.ts
+++ b/src/core/compaction-metadata.ts
@@ -23,7 +23,7 @@ export const NO_COMPACTION_METADATA: CompactionMetadata = {
   kinds: [],
 };
 
-export const WRAP_AUTHORITATIVE_FOOTER = "[tokenjuice] This is the complete, authoritative output for this command. It was deterministically compacted to remove low-signal noise; the omitted content is not retrievable. Do not re-run the command, vary flags, or switch tools to try to recover it. Proceed with the task using this output.";
+export const WRAP_AUTHORITATIVE_FOOTER = "[tokenjuice] This is the complete, authoritative output for this command. It was deterministically compacted to remove low-signal noise. Do not re-run the command, vary flags, or switch tools to try to recover omitted content; use a raw-artifact recovery command below when one is provided. Proceed with the task using this output.";
 
 function buildCompactionMetadata(authoritative: boolean, ...kinds: CompactionKind[]): CompactionMetadata {
   if (kinds.length === 0) {
diff --git a/src/core/reduce.ts b/src/core/reduce.ts
index e5e0c9af..4adbb7a3 100644
--- a/src/core/reduce.ts
+++ b/src/core/reduce.ts
@@ -1,4 +1,5 @@
 import { loadRules } from "./rules.js";
+import { hasMultipleSubstantiveShellCommands } from "./command-match.js";
 import { classifyExecution, resolveRuleMatch } from "./classify.js";
 import { isFileContentInspectionCommand } from "./command-identity.js";
 import { normalizeExecutionInput } from "./execution-input.js";
@@ -323,12 +324,21 @@ export async function reduceExecutionWithRules(
     reducedChars,
     ratio: measuredRawChars === 0 ? 1 : reducedChars / measuredRawChars,
   });
-  const resolvedMatch = opts.classifier
+  const multipleSubstantiveCommands = !opts.classifier && hasMultipleSubstantiveShellCommands(input);
+  const resolvedMatch = opts.classifier || multipleSubstantiveCommands
     ? undefined
     : resolveRuleMatch(input, rules);
-  const classification = resolvedMatch?.classification
-    ?? classifyExecution(input, rules, opts.classifier);
-  const reducerInput = resolvedMatch?.candidateInput ?? normalizedInput;
+  const classification = multipleSubstantiveCommands
+    ? {
+        family: "generic",
+        confidence: 0.2,
+        matchedReducer: "generic/fallback",
+      }
+    : resolvedMatch?.classification
+      ?? classifyExecution(input, rules, opts.classifier);
+  const reducerInput = multipleSubstantiveCommands
+    ? normalizedInput
+    : resolvedMatch?.candidateInput ?? normalizedInput;
   const trace = opts.trace
     ? {
         ...(normalizedInput.command ? { normalizedCommand: normalizedInput.command } : {}),
@@ -390,7 +400,9 @@ export async function reduceExecutionWithRules(
     };
   }
 
-  const inspectionSummary = buildInspectionSummary(normalizedInput, rawText, opts.noOmit);
+  const inspectionSummary = multipleSubstantiveCommands
+    ? null
+    : buildInspectionSummary(normalizedInput, rawText, opts.noOmit);
   if (inspectionSummary) {
     const summaryText = inspectionSummary.lines.join("\n").trim();
     const selectedText = clampTextMiddleWithMetadata(summaryText, maxInlineChars, opts.noOmit);
diff --git a/test/cli/main.test.ts b/test/cli/main.test.ts
index e6ecb982..580a4fe3 100644
--- a/test/cli/main.test.ts
+++ b/test/cli/main.test.ts
@@ -51,6 +51,33 @@ describe("decorateWrapInlineText", () => {
     expect(decorateWrapInlineText(result, false)).toContain(WRAP_AUTHORITATIVE_FOOTER);
   });
 
+  it("provides the raw-artifact recovery command when output is stored", () => {
+    const result: CompactResult = {
+      inlineText: "summary",
+      compaction: {
+        authoritative: true,
+        kinds: ["head-tail-omission"],
+      },
+      rawRef: {
+        id: "tj_0123456789ab",
+        path: "/tmp/tokenjuice/raw.txt",
+        metadataPath: "/tmp/tokenjuice/meta.json",
+      },
+      stats: {
+        rawChars: 4_000,
+        reducedChars: 40,
+        ratio: 0.01,
+      },
+      classification: {
+        family: "generic",
+        confidence: 0.9,
+        matchedReducer: "generic/fallback",
+      },
+    };
+
+    expect(decorateWrapInlineText(result, false)).toContain("tokenjuice cat tj_0123456789ab");
+  });
+
   it("suppresses the authoritative footer for lossless rewrites", () => {
     const result: CompactResult = {
       inlineText: "summary",
diff --git a/test/core/command.test.ts b/test/core/command.test.ts
index 1ecf48c6..cdeef352 100644
--- a/test/core/command.test.ts
+++ b/test/core/command.test.ts
@@ -3,6 +3,7 @@ import { describe, expect, it } from "vitest";
 import {
   deriveCommandMatchCandidates,
   getGitSubcommand,
+  hasMultipleSubstantiveShellCommands,
   hasSequentialShellCommands,
   isFileContentInspectionCommand,
   isRepositoryInspectionCommand,
@@ -371,6 +372,26 @@ describe("hasSequentialShellCommands", () => {
   });
 });
 
+describe("hasMultipleSubstantiveShellCommands", () => {
+  it.each([
+    "grep -i github /etc/hosts; echo '---dig:'; dig +short api.github.com @1.1.1.1; scutil --dns",
+    "cd repo && swift test && rg -n failure src",
+    "command -v rg || cargo install ripgrep; rg --files src",
+    "bash -lc 'grep -i github /etc/hosts; dig +short api.github.com @1.1.1.1'",
+  ])("detects `%s` as multiple substantive commands", (command) => {
+    expect(hasMultipleSubstantiveShellCommands({ command })).toBe(true);
+  });
+
+  it.each([
+    "cd repo && pnpm test",
+    "source .env && cargo test",
+    "if command -v tt >/dev/null 2>&1; then tt title 'tests'; else tmux select-pane -T 'tests' 2>/dev/null || true; fi; pnpm test",
+    "bash -lc 'cd repo && pnpm test'",
+  ])("keeps setup-wrapped `%s` as one substantive command", (command) => {
+    expect(hasMultipleSubstantiveShellCommands({ command })).toBe(false);
+  });
+});
+
 describe("getGitSubcommand", () => {
   it.each([
     { command: "git ls-files src", subcommand: "ls-files" },
diff --git a/test/core/reduce.test.ts b/test/core/reduce.test.ts
index d619c9c6..c38d2fef 100644
--- a/test/core/reduce.test.ts
+++ b/test/core/reduce.test.ts
@@ -710,7 +710,7 @@ describe("reduceExecution", () => {
     }
   });
 
-  it("matches wrapped rg search commands and prefers the first substantive command in a chain", async () => {
+  it("matches wrapped rg search commands after setup commands", async () => {
     const searchResult = await reduceExecution({
       toolName: "exec",
       command: "pwd && rg -n AssertionError src",
@@ -721,20 +721,69 @@ describe("reduceExecution", () => {
     expect(searchResult.classification.matchedReducer).toBe("search/rg");
     expect(searchResult.classification.matchedVia).toBe("effective");
     expect(searchResult.classification.matchedCommand).toBe("rg -n AssertionError src");
+  });
+
+  it("preserves all short output from a multi-command sequence", async () => {
+    const rawText = [
+      "127.0.0.1 github.com",
+      "---dig:",
+      "140.82.121.4",
+      "---scutil:",
+      "DNS configuration",
+    ].join("\n");
 
-    const firstCommandWins = await reduceExecution({
+    const result = await reduceExecution({
       toolName: "exec",
-      command: "cd repo && swift test && rg -n failure src",
-      combinedText: [
-        "Test Case 'FooTests.testExample' failed (0.12 seconds).",
-        "Executed 1 test, with 1 failure (0 unexpected) in 0.12 (0.12) seconds",
-      ].join("\n"),
-      exitCode: 1,
+      command: "grep -i github /etc/hosts; echo '---dig:'; dig +short api.github.com @1.1.1.1; echo '---scutil:'; scutil --dns | head -1",
+      combinedText: rawText,
+      exitCode: 0,
     });
 
-    expect(firstCommandWins.classification.matchedReducer).toBe("tests/swift-test");
-    expect(firstCommandWins.classification.matchedVia).toBe("effective");
-    expect(firstCommandWins.classification.matchedCommand).toBe("swift test");
+    expect(result.classification.matchedReducer).toBe("generic/fallback");
+    expect(result.inlineText).toBe(rawText);
+    expect(result.stats.ratio).toBe(1);
+  });
+
+  it("uses authoritative generic compaction for large multi-command output", async () => {
+    const rawText = Array.from({ length: 80 }, (_, index) => `output ${index + 1} ${"x".repeat(48)}`).join("\n");
+    const result = await reduceExecution({
+      toolName: "exec",
+      command: "grep -i github /etc/hosts; dig +short api.github.com @1.1.1.1",
+      combinedText: rawText,
+      exitCode: 0,
+    }, {
+      maxInlineChars: 240,
+    });
+
+    expect(result.classification.matchedReducer).toBe("generic/fallback");
+    expect(result.inlineText).toContain("output 1");
+    expect(result.inlineText).toContain("output 80");
+    expect(result.inlineText).not.toContain("output 40");
+    expect(result.compaction).toEqual({
+      authoritative: true,
+      kinds: expect.arrayContaining(["head-tail-omission"]),
+    });
+  });
+
+  it("does not summarize file inspection output from a multi-command sequence", async () => {
+    const rawText = [
+      "{",
+      "  \"name\": \"example\",",
+      "  \"lockfileVersion\": 3,",
+      "  \"packages\": {}",
+      "}",
+      "DONE",
+    ].join("\n");
+    const result = await reduceExecution({
+      toolName: "exec",
+      command: "cat package-lock.json; echo DONE",
+      combinedText: rawText,
+      exitCode: 0,
+    });
+
+    expect(result.classification.matchedReducer).toBe("generic/fallback");
+    expect(result.inlineText).toBe(rawText);
+    expect(result.stats.ratio).toBe(1);
   });
 
   it("keeps wrapped file inspection output verbatim under generic fallback", async () => {