diff --git a/src/core/command-identity.ts b/src/core/command-identity.ts index a329d65..808c59a 100644 --- a/src/core/command-identity.ts +++ b/src/core/command-identity.ts @@ -2,12 +2,39 @@ import { basename } from "node:path"; import type { ToolExecutionInput } from "../types.js"; -import { deriveCommandMatchCandidates, getSourcePriority, type CommandMatchCandidate } from "./command-match.js"; -import { stripLeadingCdPrefix, tokenizeCommand } from "./command-shell.js"; +import { deriveCommandMatchCandidates, getSourcePriority, type CommandMatchCandidate, unwrapShellRunner } from "./command-match.js"; +import { hasSequentialShellCommands, isCompoundShellCommand, stripLeadingCdPrefix, tokenizeCommand } from "./command-shell.js"; const FILE_CONTENT_INSPECTION_COMMANDS = new Set(["cat", "sed", "head", "tail", "nl", "bat", "batcat", "jq", "yq"]); const REPO_INVENTORY_COMMANDS = new Set(["find", "fd", "fdfind", "ls", "tree"]); +// ssh options that consume a separate value argument (per ssh(1)); needed to +// find where the destination ends and the remote command begins. +const SSH_OPTIONS_WITH_VALUES = new Set([ + "-B", + "-b", + "-c", + "-D", + "-E", + "-e", + "-F", + "-I", + "-i", + "-J", + "-L", + "-l", + "-m", + "-O", + "-o", + "-P", + "-p", + "-Q", + "-R", + "-S", + "-W", + "-w", +]); + function getNormalizedArgv(input: Pick): string[] { if (input.argv?.length) { return input.argv; @@ -110,6 +137,51 @@ function isGitShowFileContentArgv(argv: string[]): boolean { return false; } +function isPlutilFileContentArgv(argv: string[]): boolean { + if (getCommandName(argv) !== "plutil") { + return false; + } + if (argv.includes("-p")) { + return true; + } + const outputIndex = argv.indexOf("-o"); + return outputIndex !== -1 && argv[outputIndex + 1] === "-"; +} + +function isReadOnlyConfigInspectionArgv(argv: string[]): boolean { + return getCommandName(argv) === "openclaw" + && argv[1] === "config" + && argv[2] === "get"; +} + +function getSshRemoteCommand(argv: string[]): string | null { + if (getCommandName(argv) !== "ssh") { + return null; + } + + for (let index = 1; index < argv.length; index += 1) { + const arg = argv[index]; + if (!arg) { + continue; + } + if (arg === "--") { + continue; + } + if (SSH_OPTIONS_WITH_VALUES.has(arg)) { + index += 1; + continue; + } + if (arg.startsWith("-")) { + continue; + } + + const remoteCommand = argv.slice(index + 1).join(" ").trim(); + return remoteCommand || null; + } + + return null; +} + export function isFileContentInspectionArgv(argv: string[]): boolean { const argv0 = getCommandName(argv); if (!argv0) { @@ -216,6 +288,44 @@ export function isFileContentInspectionCommand(input: Pick isGhApiContentsDecodeCommand(candidate.command)); } +function isVerbatimRemoteInspectionCommand(command: string): boolean { + const effectiveCommand = unwrapShellRunner({ command }) ?? command; + const isSingleGhContentsDecode = isGhApiContentsDecodeCommand(effectiveCommand) + && !hasSequentialShellCommands(effectiveCommand) + && /^[^|]+\|\s*base64\s+(?:-[dD]\b|--decode\b)\s*$/u.test(effectiveCommand.trim()); + if (isSingleGhContentsDecode) { + return true; + } + if ( + isCompoundShellCommand(stripLeadingCdPrefix(command)) + || isCompoundShellCommand(effectiveCommand) + ) { + return false; + } + + const argv = getInspectionArgv({ command: effectiveCommand }); + return isPlutilFileContentArgv(argv) + || isReadOnlyConfigInspectionArgv(argv) + || isFileContentInspectionArgv(argv); +} + +export function isVerbatimConfigInspectionCommand(input: Pick): boolean { + if (input.command && isCompoundShellCommand(stripLeadingCdPrefix(input.command))) { + return false; + } + + const candidates = deriveCommandMatchCandidates(input); + return candidates.some((candidate) => ( + isPlutilFileContentArgv(candidate.argv) + || isReadOnlyConfigInspectionArgv(candidate.argv) + )) + || candidates.some((candidate) => { + const remoteCommand = getSshRemoteCommand(candidate.argv); + return remoteCommand !== null + && isVerbatimRemoteInspectionCommand(remoteCommand); + }); +} + export function isRepositoryInspectionCommand(input: Pick): boolean { return isRepositoryInspectionArgv(getMostDerivedCandidate(input).argv); } diff --git a/src/core/command.ts b/src/core/command.ts index 8700eec..8bfd2d1 100644 --- a/src/core/command.ts +++ b/src/core/command.ts @@ -22,6 +22,7 @@ export { getGitSubcommand, isFileContentInspectionCommand, isRepositoryInspectionCommand, + isVerbatimConfigInspectionCommand, normalizeCommandSignature, normalizeEffectiveCommandSignature, } from "./command-identity.js"; diff --git a/src/core/reduce.ts b/src/core/reduce.ts index 4adbb7a..46d8f6a 100644 --- a/src/core/reduce.ts +++ b/src/core/reduce.ts @@ -1,7 +1,7 @@ import { loadRules } from "./rules.js"; import { hasMultipleSubstantiveShellCommands } from "./command-match.js"; import { classifyExecution, resolveRuleMatch } from "./classify.js"; -import { isFileContentInspectionCommand } from "./command-identity.js"; +import { isFileContentInspectionCommand, isVerbatimConfigInspectionCommand } from "./command-identity.js"; import { normalizeExecutionInput } from "./execution-input.js"; import { clampTextMiddleWithMetadata, clampTextWithMetadata, countTextChars, dedupeAdjacent, headTail, normalizeLines, pluralize, stripAnsi, trimEmptyEdges } from "./text.js"; import { storeArtifact, storeArtifactMetadata } from "./artifacts.js"; @@ -354,7 +354,9 @@ export async function reduceExecutionWithRules( } : undefined; - if (opts.raw) { + const requiresVerbatimOutput = !multipleSubstantiveCommands + && isVerbatimConfigInspectionCommand(input); + if (opts.raw || requiresVerbatimOutput) { const rawRef = opts.store ? await storeArtifact( { diff --git a/test/core/command.test.ts b/test/core/command.test.ts index cdeef35..962a7a3 100644 --- a/test/core/command.test.ts +++ b/test/core/command.test.ts @@ -7,6 +7,7 @@ import { hasSequentialShellCommands, isFileContentInspectionCommand, isRepositoryInspectionCommand, + isVerbatimConfigInspectionCommand, normalizeCommandSignature, normalizeEffectiveCommandSignature, normalizeExecutionInput, @@ -443,6 +444,40 @@ describe("isFileContentInspectionCommand", () => { it("does not treat git show commit summaries as file inspection", () => { expect(isFileContentInspectionCommand({ command: "git show HEAD --stat" })).toBe(false); }); + +}); + +describe("isVerbatimConfigInspectionCommand", () => { + it.each([ + { label: "plutil print", command: "plutil -p /Library/LaunchDaemons/com.example.daemon.plist" }, + { label: "plutil convert to stdout", command: "plutil -convert json -o - settings.plist" }, + { label: "read-only config get", command: "openclaw config get agents.defaults" }, + { label: "ssh-wrapped cat", command: "ssh build-host 'cat /etc/hosts'" }, + { label: "ssh-wrapped cat with compression", command: "ssh -C build-host 'cat /etc/hosts'" }, + { label: "ssh-wrapped cat with cipher", command: "ssh -c aes128-ctr build-host 'cat /etc/hosts'" }, + { label: "ssh-wrapped cat with bind interface", command: "ssh -B en0 build-host 'cat /etc/hosts'" }, + { label: "ssh-wrapped cat with tag", command: "ssh -P audit build-host 'cat /etc/hosts'" }, + { label: "ssh-wrapped shell runner", command: "ssh build-host \"bash -lc 'cat /etc/hosts'\"" }, + { label: "ssh-wrapped plutil with ssh options", command: "ssh -p 2222 -i ~/.ssh/id_ed25519 build-host 'plutil -p /Library/LaunchDaemons/com.example.daemon.plist'" }, + { label: "ssh-wrapped read-only config get", command: "ssh build-host 'openclaw config get gateway'" }, + { label: "ssh-wrapped gh contents decode", command: "ssh build-host 'gh api repos/o/r/contents/file --jq .content | base64 --decode'" }, + ])("detects $label as a verbatim config inspection", ({ command }) => { + expect(isVerbatimConfigInspectionCommand({ command })).toBe(true); + }); + + it.each([ + "plutil -convert binary1 settings.plist", + "openclaw config set agents.defaults.model test", + "ssh build-host 'rm -rf /tmp/scratch'", + "ssh build-host", + "ssh build-host 'cat /etc/hosts && pytest -q'", + "ssh build-host \"bash -lc 'cat /etc/hosts; pytest -q'\"", + "ssh build-host 'gh api repos/o/r/contents/file --jq .content | base64 --decode; pytest -q'", + "bash -lc 'openclaw config get gateway' && pytest -q", + "ssh build-host \"bash -lc 'cat /etc/hosts' && pytest -q\"", + ])("does not treat `%s` as a verbatim config inspection", (command) => { + expect(isVerbatimConfigInspectionCommand({ command })).toBe(false); + }); }); describe("isRepositoryInspectionCommand", () => { diff --git a/test/core/reduce.test.ts b/test/core/reduce.test.ts index c38d2fe..ef1497e 100644 --- a/test/core/reduce.test.ts +++ b/test/core/reduce.test.ts @@ -416,6 +416,67 @@ describe("reduceExecution", () => { expect(result.stats.ratio).toBe(1); }); + it("keeps plutil plist dumps verbatim", async () => { + const rawText = [ + "{", + ...Array.from({ length: 80 }, (_, index) => ` "Key${index + 1}" => "${"value ".repeat(12)}${index + 1}"`), + "}", + ].join("\n"); + + const result = await reduceExecution({ + toolName: "exec", + command: "plutil -p /Library/LaunchDaemons/com.example.daemon.plist", + argv: ["plutil", "-p", "/Library/LaunchDaemons/com.example.daemon.plist"], + stdout: rawText, + exitCode: 0, + }); + + expect(result.inlineText).toBe(rawText); + expect(result.stats.ratio).toBe(1); + }); + + it("keeps read-only config inspection output verbatim", async () => { + const rawText = Array.from({ length: 80 }, (_, index) => `setting-${index + 1}: ${"value ".repeat(12)}${index + 1}`).join("\n"); + + const result = await reduceExecution({ + toolName: "exec", + command: "openclaw config get agents.defaults", + stdout: rawText, + exitCode: 0, + }); + + expect(result.inlineText).toBe(rawText); + expect(result.stats.ratio).toBe(1); + }); + + it("keeps ssh-wrapped file inspection output verbatim", async () => { + const rawText = Array.from({ length: 80 }, (_, index) => `host-line ${index + 1} ${"value ".repeat(12)}`).join("\n"); + + const result = await reduceExecution({ + toolName: "exec", + command: "ssh build-host 'cat /var/log/app.log'", + stdout: rawText, + exitCode: 0, + }); + + expect(result.inlineText).toBe(rawText); + expect(result.stats.ratio).toBe(1); + }); + + it("keeps ssh-wrapped gh contents decode output verbatim", async () => { + const rawText = Array.from({ length: 80 }, (_, index) => `file-line ${index + 1} ${"value ".repeat(12)}`).join("\n"); + + const result = await reduceExecution({ + toolName: "exec", + command: "ssh build-host 'gh api repos/o/r/contents/file --jq .content | base64 --decode'", + stdout: rawText, + exitCode: 0, + }); + + expect(result.inlineText).toBe(rawText); + expect(result.stats.ratio).toBe(1); + }); + it("still compacts filesystem inventory commands through their dedicated reducers", async () => { const result = await reduceExecution({ toolName: "exec",