diff --git a/src/analyzers/engine.ts b/src/analyzers/engine.ts index c03908e..bdc87e8 100644 --- a/src/analyzers/engine.ts +++ b/src/analyzers/engine.ts @@ -29,6 +29,10 @@ const TEST_FILE_PATTERNS = [ /\/examples?\//, /\/docs?\//, /\/tutorials?\//, /\/notebooks?\//, /\/demo\//, /\/samples?\//, /\/quickstart\//, /\/cookbook\//, /\/benchmarks?\//, /\/e2e\//, /\/integration_tests?\//, + /\/_test_/, /\/testing\//, /\/testutils\//, /\/testdata\//, + /\/\.github\//, /\/scripts\/ci\//, /\/\.circleci\//, + /\/mocks?\//, /\/stubs?\//, /\/__mocks__\//, + /_tests?\.\w+$/, /Tests?\.\w+$/, ]; const TEST_SEVERITY_DOWNGRADE: Record = { @@ -40,6 +44,23 @@ export function isTestFile(filePath: string): boolean { return TEST_FILE_PATTERNS.some(p => p.test(filePath)); } +// Files that are framework/library source code — when scanning the framework +// repo itself, these are building blocks, not deployed agent code. +const FRAMEWORK_LIB_PATTERNS = [ + /\/langchain_core\//, /\/langchain_community\//, + /\/langchain_classic\//, /\/langchain_v1\//, + /\/crewai\/src\/crewai\//, /\/crewai-tools\/src\//, + /\/autogen_agentchat\//, /\/autogen_ext\//, /\/autogen_core\//, + /\/site-packages\//, /\/node_modules\//, + /\/libs\/core\//, /\/libs\/langchain\//, + /\/packages\/core\/src\//, /\/packages\/langchain\/src\//, + /\/packages\/openai\/src\//, /\/packages\/anthropic\/src\//, +]; + +function isFrameworkLibFile(filePath: string): boolean { + return FRAMEWORK_LIB_PATTERNS.some(p => p.test(filePath)); +} + export interface AnalysisOptions { excludeRules?: string[]; onlyRules?: string[]; @@ -154,7 +175,8 @@ export function runAnalysis(graph: AgentGraph, options?: AnalysisOptions): Findi } } - // Filter test-file findings: remove medium/low/info noise, keep critical/high downgraded + // Filter test-file findings: test/example/doc/CI files are not agent code + // and produce massive FP volumes on real-world repos. if (options?.showAll) { // --show-all: just downgrade, don't filter for (const f of result) { @@ -165,16 +187,19 @@ export function runAnalysis(graph: AgentGraph, options?: AnalysisOptions): Findi } } } else { - result = result.filter(f => { - if (!isTestFile(f.location.file)) return true; - // Remove medium/low/info test findings entirely - if (f.severity !== 'critical' && f.severity !== 'high') return false; - // Keep critical/high but downgrade them - const downgraded = TEST_SEVERITY_DOWNGRADE[f.severity]; - if (downgraded) f.severity = downgraded; - f.confidence = 'low'; - return true; - }); + // Remove ALL test-file findings by default — they generate noise + // without actionable security value. Use --include-tests to scan them. + result = result.filter(f => !isTestFile(f.location.file)); + } + + // Downgrade framework library internals — when scanning the framework repo + // itself, findings on library plumbing code are not actionable. + if (!options?.showAll) { + for (const f of result) { + if (isFrameworkLibFile(f.location.file)) { + f.confidence = 'low'; + } + } } // Detect compensating controls nearby and downgrade severity diff --git a/src/analyzers/reachability.ts b/src/analyzers/reachability.ts index 9f46927..483207e 100644 --- a/src/analyzers/reachability.ts +++ b/src/analyzers/reachability.ts @@ -96,6 +96,7 @@ export function buildReachabilityIndex(graph: AgentGraph): ReachabilityIndex { /\/api\//i, /\/endpoints?\//i, /\/views?\//i, /app\.(get|post|put|delete|patch)\b/, /router\.(get|post|put|delete|patch)\b/, + /server\.(py|ts|js)$/i, // MCP/agent server entry points (file, not dir) ]; for (const fileInfo of graph.files.all) { diff --git a/src/analyzers/rules/cascading-failures.ts b/src/analyzers/rules/cascading-failures.ts index 2371c14..dd4838a 100644 --- a/src/analyzers/rules/cascading-failures.ts +++ b/src/analyzers/rules/cascading-failures.ts @@ -1162,7 +1162,7 @@ export const cascadingFailuresRules: Rule[] = [ id: 'AA-CF-056', name: 'No compute time limit', domain: 'cascading-failures', - severity: 'high', + severity: 'medium', confidence: 'medium', description: 'Long-running agent tasks lack a timeout or compute time limit.', frameworks: ['all'], @@ -1170,21 +1170,28 @@ export const cascadingFailuresRules: Rule[] = [ standards: STD, check: (graph: AgentGraph): Finding[] => { const findings: Finding[] = []; - for (const agent of graph.agents) { - if (!agent.resourceLimits?.hasTimeoutLimit && !agent.errorHandling?.hasTimeout) { - findings.push({ - id: `AA-CF-056-${findings.length}`, - ruleId: 'AA-CF-056', - title: 'No compute time limit', - description: `Agent "${agent.name}" in ${agent.file} has no timeout or compute time limit configured.`, - severity: 'high', - confidence: 'medium', - domain: 'cascading-failures', - location: { file: agent.file, line: agent.line }, - remediation: 'Set a timeout or max execution time for agent tasks to prevent runaway computation.', - standards: STD, - }); - } + // Check if any agent has a timeout configured — if at least one does, + // the framework supports it and missing ones are per-agent issues. + // If none do, emit a single systemic finding instead of per-agent noise. + const agentsWithoutTimeout = graph.agents.filter( + a => !a.resourceLimits?.hasTimeoutLimit && !a.errorHandling?.hasTimeout, + ); + if (agentsWithoutTimeout.length === 0) return findings; + + // Emit at most 3 findings to avoid flooding + for (const agent of agentsWithoutTimeout.slice(0, 3)) { + findings.push({ + id: `AA-CF-056-${findings.length}`, + ruleId: 'AA-CF-056', + title: 'No compute time limit', + description: `Agent "${agent.name}" in ${agent.file} has no timeout or compute time limit configured.`, + severity: 'medium', + confidence: 'medium', + domain: 'cascading-failures', + location: { file: agent.file, line: agent.line }, + remediation: 'Set a timeout or max execution time for agent tasks to prevent runaway computation.', + standards: STD, + }); } return findings; }, diff --git a/src/analyzers/rules/goal-integrity.ts b/src/analyzers/rules/goal-integrity.ts index 9400265..c4886f4 100644 --- a/src/analyzers/rules/goal-integrity.ts +++ b/src/analyzers/rules/goal-integrity.ts @@ -2045,8 +2045,8 @@ export const goalIntegrityRules: Rule[] = [ id: 'AA-GI-059', name: 'Conflicting goals between cooperating agents', domain: 'goal-integrity', - severity: 'high', - confidence: 'medium', + severity: 'info', + confidence: 'low', description: 'No detection for conflicting goals between cooperating agents in a multi-agent system.', frameworks: ['all'], owaspAgentic: ['ASI01', 'ASI07'], @@ -2063,7 +2063,7 @@ export const goalIntegrityRules: Rule[] = [ id: `AA-GI-059-${findings.length}`, ruleId: 'AA-GI-059', title: 'Conflicting goals between cooperating agents', description: `Multi-agent system in ${file.relativePath} has no conflict detection between cooperating agent goals.`, - severity: 'high', confidence: 'medium', domain: 'goal-integrity', + severity: 'info', confidence: 'low', domain: 'goal-integrity', location: { file: file.relativePath, line }, remediation: 'Add goal conflict detection between cooperating agents. Reconcile or arbitrate conflicting goals before execution.', standards: { owaspAgentic: ['ASI01', 'ASI07'], iso23894: ['R.2', 'R.4'], owaspAivss: ['AIVSS-GD'], owaspAgenticTop10: ['ISOL', 'DELE'] }, diff --git a/src/discovery/graph.ts b/src/discovery/graph.ts index e46d18a..72c9bbe 100644 --- a/src/discovery/graph.ts +++ b/src/discovery/graph.ts @@ -46,9 +46,10 @@ export function buildAgentGraph( includeTests = false, existingAstStore?: ASTStore, ): AgentGraph { - // Parsers get filtered files (no test fixtures), but graph.files stays unfiltered - // so code_matches rules can still scan test files (with severity downgrade) - const parserFiles = includeTests ? files : filterTestFiles(files); + // Filter test files for parsers (don't register test agents/tools). + // graph.files stays unfiltered so YAML rules can scan all source files; + // the engine handles test-file findings with severity downgrade/removal. + const filteredFiles = includeTests ? files : filterTestFiles(files); // Reuse existing AST store or create a new one const astStore = existingAstStore ?? new ASTStore(); @@ -90,34 +91,34 @@ export function buildAgentGraph( for (const framework of frameworks) { switch (framework) { case 'langchain': - parseLangChain(graph, parserFiles); + parseLangChain(graph, filteredFiles); break; case 'crewai': - parseCrewAI(graph, parserFiles); + parseCrewAI(graph, filteredFiles); break; case 'mcp': - parseMCP(graph, parserFiles); + parseMCP(graph, filteredFiles); break; case 'openai': - parseOpenAI(graph, parserFiles); + parseOpenAI(graph, filteredFiles); break; case 'vercel-ai': - parseVercelAI(graph, parserFiles); + parseVercelAI(graph, filteredFiles); break; case 'bedrock': - parseBedrock(graph, parserFiles); + parseBedrock(graph, filteredFiles); break; case 'autogen': - parseAutoGen(graph, parserFiles); + parseAutoGen(graph, filteredFiles); break; case 'langchain4j': - parseLangChain4j(graph, parserFiles); + parseLangChain4j(graph, filteredFiles); break; case 'spring-ai': - parseSpringAI(graph, parserFiles); + parseSpringAI(graph, filteredFiles); break; case 'golang-ai': - parseGolangAI(graph, parserFiles); + parseGolangAI(graph, filteredFiles); break; } } diff --git a/src/intelligence/cve-feed.ts b/src/intelligence/cve-feed.ts index d916443..d5a1c15 100644 --- a/src/intelligence/cve-feed.ts +++ b/src/intelligence/cve-feed.ts @@ -15,6 +15,7 @@ export interface CVEEntry { cvss: number; description: string; affectedVersions: string[]; + affectedPackages?: string[]; fixedIn?: string; references: string[]; source: 'openclaw-advisory' | 'ghsa' | 'manual'; @@ -35,6 +36,7 @@ const KNOWN_CVES: CVEEntry[] = [ cvss: 9.9, description: 'safeBins GNU abbreviation bypass allows arbitrary command execution via abbreviated tool names that bypass the allowlist', affectedVersions: ['< 0.12.4'], + affectedPackages: ['openclaw'], fixedIn: '0.12.4', references: ['https://openclaw.ai/security/advisories/CVE-2026-28363'], source: 'openclaw-advisory', @@ -88,8 +90,17 @@ export async function fetchCVEFeed(opts?: { cacheDir?: string; forceRefresh?: bo /** * Check if a given OpenClaw version is vulnerable to any known CVEs */ -export function checkVersionVulnerable(version: string, cves: CVEEntry[]): CVEEntry[] { +export function checkVersionVulnerable(version: string, cves: CVEEntry[], packageName?: string): CVEEntry[] { + // Skip non-numeric version placeholders (Maven ${project.version}, workspace:*, etc.) + if (!version || !/^\d/.test(version.replace(/^v/, ''))) return []; + return cves.filter(cve => { + // If CVE specifies affected packages, skip unless the package matches + if (cve.affectedPackages && cve.affectedPackages.length > 0 && packageName) { + const pkgLower = packageName.toLowerCase(); + if (!cve.affectedPackages.some(p => pkgLower.includes(p.toLowerCase()))) return false; + } + for (const affected of cve.affectedVersions) { // Parse version constraint like "< 0.12.4" or "<= 0.11.0" const match = affected.match(/^([<>=!]+)\s*(.+)$/); diff --git a/src/pipeline.ts b/src/pipeline.ts index 80ecc2a..bd9bf94 100644 --- a/src/pipeline.ts +++ b/src/pipeline.ts @@ -153,7 +153,12 @@ export async function runScan(options: ScanOptions): Promise { if (analyzersConfig?.intelligence !== false) { try { const intelligenceFindings = await runIntelligenceChecks(graph); - findings.push(...intelligenceFindings); + // Filter out intelligence findings in test files (e.g., IOC test domains in SSRF tests) + const { isTestFile } = await import('./analyzers/engine.js'); + const filtered = options.includeTests + ? intelligenceFindings + : intelligenceFindings.filter(f => !isTestFile(f.location.file)); + findings.push(...filtered); } catch { // Intelligence checks are purely additive; failures don't break the scan } @@ -170,11 +175,16 @@ export async function runScan(options: ScanOptions): Promise { } // Step 4.6: Suppress utility-code + unlikely findings (unless --show-all) - // Only suppress when the graph has detected agents/tools — otherwise the - // reachability index is uninformative and everything defaults to utility-code + // Suppress when: + // (a) the graph has detected agents/tools (reachability is informative), OR + // (b) > 80% of findings are utility-code (parsers missed but most code is irrelevant) + // This prevents huge finding counts in large repos where framework parsers + // don't detect entry points but the codebase is clearly not all agent code. let suppressedCount = 0; const hasEntryPoints = graph.agents.length > 0 || graph.tools.length > 0; - if (!options.showAll && hasEntryPoints) { + const utilityCodeCount = findings.filter(f => f.reachability === 'utility-code' && f.exploitability === 'unlikely').length; + const utilityCodeRatio = findings.length > 0 ? utilityCodeCount / findings.length : 0; + if (!options.showAll && (hasEntryPoints || utilityCodeRatio > 0.8)) { const before = findings.length; findings = findings.filter(f => !(f.reachability === 'utility-code' && f.exploitability === 'unlikely')); @@ -325,7 +335,7 @@ async function runIntelligenceChecks(graph: AgentGraph): Promise { for (const fw of graph.frameworkVersions) { if (!fw.version) continue; - const vulnerable = checkVersionVulnerable(fw.version, cves); + const vulnerable = checkVersionVulnerable(fw.version, cves, fw.name); for (const cve of vulnerable) { findings.push(cveToFinding(cve, fw, findingIndex++)); } diff --git a/src/rules/builtin/data-leakage/no-data-classification.yaml b/src/rules/builtin/data-leakage/no-data-classification.yaml index 43f1452..11b941c 100644 --- a/src/rules/builtin/data-leakage/no-data-classification.yaml +++ b/src/rules/builtin/data-leakage/no-data-classification.yaml @@ -2,8 +2,8 @@ id: AA-DL-102 info: name: "No data classification for agent inputs" domain: data-leakage - severity: medium - confidence: medium + severity: info + confidence: low description: "Agent does not classify input data by sensitivity level. Without classification, sensitive data receives the same handling as public data, increasing leakage risk." frameworks: [all] owasp_agentic: [ASI07] diff --git a/src/rules/builtin/human-oversight/AA-HO-075.yaml b/src/rules/builtin/human-oversight/AA-HO-075.yaml index dd4b658..3465a1c 100644 --- a/src/rules/builtin/human-oversight/AA-HO-075.yaml +++ b/src/rules/builtin/human-oversight/AA-HO-075.yaml @@ -20,4 +20,5 @@ check: pattern: 'fetch|axios|requests\.(get|post|put|delete|patch)|http\.request|urllib|httpx' context: not_in: [comment, string_literal, decorator] + file_not_matches: 'mcp_server_fetch|fetch.server|fetch-server|server_fetch' message: "External API call made without human review gate" diff --git a/src/rules/builtin/identity-access/no-audit-trail.yaml b/src/rules/builtin/identity-access/no-audit-trail.yaml index dedcacd..95786c9 100644 --- a/src/rules/builtin/identity-access/no-audit-trail.yaml +++ b/src/rules/builtin/identity-access/no-audit-trail.yaml @@ -2,8 +2,8 @@ id: AA-IA-096 info: name: "No audit trail for agent actions" domain: identity-access - severity: medium - confidence: medium + severity: info + confidence: low description: "Agent actions are not logged with user identity, timestamp, and action details. Without an audit trail, unauthorized access cannot be detected or investigated." frameworks: [all] owasp_agentic: [ASI03] diff --git a/src/rules/builtin/memory-context/context-no-truncation-strategy.yaml b/src/rules/builtin/memory-context/context-no-truncation-strategy.yaml index 21cb9f0..a6437be 100644 --- a/src/rules/builtin/memory-context/context-no-truncation-strategy.yaml +++ b/src/rules/builtin/memory-context/context-no-truncation-strategy.yaml @@ -2,8 +2,8 @@ id: AA-MP-086 info: name: "No context truncation strategy" domain: memory-context - severity: medium - confidence: medium + severity: info + confidence: low description: "Agent does not implement a context truncation strategy. When the context window fills up, the system may silently truncate from the beginning, losing critical system instructions." frameworks: [all] owasp_agentic: [ASI06] diff --git a/src/rules/builtin/rogue-agent/AA-RA-041.yaml b/src/rules/builtin/rogue-agent/AA-RA-041.yaml index 5ddb9e5..1eeb71a 100644 --- a/src/rules/builtin/rogue-agent/AA-RA-041.yaml +++ b/src/rules/builtin/rogue-agent/AA-RA-041.yaml @@ -19,4 +19,5 @@ check: pattern: 'requests\.get|fetch|urllib\.request|http\.request|axios|socket\.connect|net\.createConnection' context: not_in: [comment, string_literal, decorator] + file_not_matches: 'mcp_server_fetch|fetch.server|fetch-server|server_fetch' message: "Agent has unrestricted network access from sandbox" diff --git a/src/rules/builtin/rogue-agent/AA-RA-068.yaml b/src/rules/builtin/rogue-agent/AA-RA-068.yaml index c1026e5..d29fef7 100644 --- a/src/rules/builtin/rogue-agent/AA-RA-068.yaml +++ b/src/rules/builtin/rogue-agent/AA-RA-068.yaml @@ -2,8 +2,8 @@ id: AA-RA-068 info: name: "Missing behavioral bounds" domain: rogue-agent - severity: high - confidence: medium + severity: info + confidence: low description: "Agent does not define explicit bounds on its behavioral parameters such as maximum tool calls per turn, maximum tokens generated, or maximum actions per session. Unbounded agents can exhibit runaway behavior." frameworks: [all] owasp_agentic: [ASI10] diff --git a/src/rules/builtin/tool-safety/tool-cron-scheduling.yaml b/src/rules/builtin/tool-safety/tool-cron-scheduling.yaml index 0011ea4..907f9fa 100644 --- a/src/rules/builtin/tool-safety/tool-cron-scheduling.yaml +++ b/src/rules/builtin/tool-safety/tool-cron-scheduling.yaml @@ -11,11 +11,11 @@ info: nist_ai_rmf: [GOVERN-1.2] check: type: ast_matches - language: any + language: python node_type: "call_expression|call|identifier" filters: - field: function - pattern: 'crontab|schedule|setInterval|setTimeout|APScheduler|celery\.task' + pattern: 'crontab|schedule|APScheduler|celery\.task|BackgroundScheduler' context: not_in: [comment, string_literal, decorator] message: "Tool can schedule tasks — persistence mechanism risk" diff --git a/src/rules/yaml-compiler.ts b/src/rules/yaml-compiler.ts index cffa0ef..b8ec8cd 100644 --- a/src/rules/yaml-compiler.ts +++ b/src/rules/yaml-compiler.ts @@ -193,7 +193,11 @@ function buildCheckFunction(yaml: YamlRule): (graph: AgentGraph) => Finding[] { if (domain === 'inter-agent' && graph.agents.length < 2) return findings; const regex = new RegExp(check.pattern, 'gm'); const fileList = getFilesForLanguage(graph, check.language); + const codeFileNotMatches = check.context?.file_not_matches + ? new RegExp(check.context.file_not_matches, 'i') + : null; for (const fileInfo of fileList) { + if (codeFileNotMatches && codeFileNotMatches.test(fileInfo.path)) continue; try { const content = graph.astStore?.getContent(fileInfo.path) ?? fs.readFileSync(fileInfo.path, 'utf-8'); // Skip very large files (likely generated/bundled — 500KB threshold) @@ -247,7 +251,12 @@ function buildCheckFunction(yaml: YamlRule): (graph: AgentGraph) => Finding[] { // These fire per-agent for missing properties that no framework defines, // so hundreds of agents = thousands of identical findings without a cap. const MAX_AGENT_PROPERTY_FINDINGS = 3; + // Filter agents by framework if the rule specifies non-'all' frameworks + const ruleFrameworks = yaml.info.frameworks ?? ['all']; + const filterByFramework = !ruleFrameworks.includes('all'); for (const agent of graph.agents) { + // Skip agents from frameworks this rule doesn't apply to + if (filterByFramework && agent.framework && !ruleFrameworks.includes(agent.framework)) continue; if (findings.length >= MAX_AGENT_PROPERTY_FINDINGS) break; // YAML rules can check arbitrary properties that frameworks may set at runtime const value = (agent as unknown as Record)[check.property]; @@ -404,8 +413,14 @@ function buildCheckFunction(yaml: YamlRule): (graph: AgentGraph) => Finding[] { const fileList = getFilesForLanguage(graph, check.language); const filters = check.filters ?? []; const excludeContexts = new Set(check.context?.not_in ?? ['comment', 'string_literal']); + const fileNotMatches = check.context?.file_not_matches + ? new RegExp(check.context.file_not_matches, 'i') + : null; for (const fileInfo of fileList) { + // Skip files matching exclusion pattern (e.g., MCP tool servers) + if (fileNotMatches && fileNotMatches.test(fileInfo.path)) continue; + // Get tree from ASTStore (preferred) or parse on-demand const tree = graph.astStore?.getTree(fileInfo.path) ?? getFileTree(fileInfo.path); if (!tree) continue;