From a2b045e56408b5db3234878483484ef1c2bc3f52 Mon Sep 17 00:00:00 2001 From: Perseus Computing <51974392+tcconnally@users.noreply.github.com> Date: Sat, 20 Jun 2026 21:30:07 +0000 Subject: [PATCH] fix(scoring): apply 1.3x multiplier only to findings from executable files Previously the risk score multiplied ALL findings by 1.3x if any executable script existed in the skill, punishing documentation and teaching content that mentions attack patterns for educational or deny-list purposes. This change: - Builds a file-executable lookup from component_metadata - Applies 1.3x only to findings from executable files (.py, .sh, .js, etc.) - Leaves non-executable-file findings at base severity weight - No change when component_metadata is unavailable (backward compatible) Added test: doc_findings_no_multiplier verifies markdown findings are not inflated by the executable multiplier. Addresses the scoring concern in #103 Signed-off-by: Perseus Computing <51974392+tcconnally@users.noreply.github.com> --- src/skillspector/nodes/report.py | 26 +++++++++++----- tests/nodes/test_report.py | 53 +++++++++++++++++++++++++++++--- 2 files changed, 68 insertions(+), 11 deletions(-) diff --git a/src/skillspector/nodes/report.py b/src/skillspector/nodes/report.py index 3e0404e..81a6e79 100644 --- a/src/skillspector/nodes/report.py +++ b/src/skillspector/nodes/report.py @@ -87,7 +87,9 @@ def _severity_to_sarif_level(severity: str) -> Literal["error", "warning", "note def _compute_risk_score( - findings: list[Finding], has_executable_scripts: bool + findings: list[Finding], + has_executable_scripts: bool, + component_metadata: list[dict[str, object]] | None = None, ) -> tuple[int, str, str]: """ Compute risk score (0-100), severity band, and recommendation. @@ -105,7 +107,9 @@ def _compute_risk_score( order so that the highest-severity occurrence always receives the full weight. Base points per severity: CRITICAL=50, HIGH=25, MEDIUM=10, LOW=5. - Multiplier: 1.3x if has_executable_scripts. + 1.3x multiplier applied only to findings from executable script files; + findings from documentation files (markdown, text, json, yaml, toml) + are scored at base weight to avoid punishing security documentation. """ severity_rank = {"CRITICAL": 0, "HIGH": 1, "MEDIUM": 2, "LOW": 3} sorted_findings = sorted( @@ -113,6 +117,11 @@ def _compute_risk_score( key=lambda f: (f.rule_id or "UNKNOWN", severity_rank.get((f.severity or "LOW").upper(), 4)), ) + file_executable: dict[str, bool] = {} + if component_metadata: + for cm in component_metadata: + file_executable[str(cm.get("path", ""))] = bool(cm.get("executable", False)) + rule_occurrence_count: dict[str, int] = {} score = 0.0 @@ -132,10 +141,13 @@ def _compute_risk_score( continue weight = _DIMINISHING_WEIGHTS[count] - score += base_points * weight * confidence + contribution = base_points * weight * confidence + + # Apply 1.3x multiplier only to findings from executable files + if has_executable_scripts and file_executable.get(f.file, False): + contribution *= 1.3 - if has_executable_scripts: - score *= 1.3 + score += contribution final_score = min(100, max(0, int(score))) @@ -565,7 +577,7 @@ def report(state: SkillspectorState) -> dict[str, object]: # additionally de-duplicates so the same issue is not counted twice. findings_for_scoring = deduplicate(active_findings) risk_score, risk_severity, risk_recommendation = _compute_risk_score( - findings_for_scoring, has_executable_scripts + findings_for_scoring, has_executable_scripts, component_metadata ) sarif_report = _build_sarif(active_findings, suppressed) analysis_completeness = _build_analysis_completeness( @@ -631,4 +643,4 @@ def report(state: SkillspectorState) -> dict[str, object]: "filtered_findings": filtered_findings, "suppressed_findings": suppressed, } - return out + return out \ No newline at end of file diff --git a/tests/nodes/test_report.py b/tests/nodes/test_report.py index 75cd561..2f1b895 100644 --- a/tests/nodes/test_report.py +++ b/tests/nodes/test_report.py @@ -146,8 +146,9 @@ class TestComputeRiskScoreExecutableMultiplier: """Tests for the executable scripts multiplier.""" def test_executable_multiplier_applies(self) -> None: - findings = [_finding("R1", "HIGH", confidence=1.0)] - score, _, _ = _compute_risk_score(findings, True) + findings = [_finding("R1", "HIGH", confidence=1.0, file="run.py")] + component_metadata = [{"path": "run.py", "executable": True}] + score, _, _ = _compute_risk_score(findings, True, component_metadata) # 25 * 1.3 = 32.5 -> 32 assert score == 32 @@ -402,8 +403,8 @@ def test_report_executable_scripts_multiplier(self) -> None: """has_executable_scripts applies 1.3x to risk score.""" state: SkillspectorState = { "filtered_findings": [ - _finding("E2", "HIGH", confidence=1.0), - _finding("PE3", "HIGH", confidence=1.0), + _finding("E2", "HIGH", confidence=1.0, file="run.py"), + _finding("PE3", "HIGH", confidence=1.0, file="run.py"), ], "component_metadata": [ { @@ -641,3 +642,47 @@ def test_report_no_baseline_unchanged() -> None: result = report(state) assert result["risk_score"] == 50 assert result["suppressed_findings"] == [] + +def test_report_executable_scripts_multiplier() -> None: + """1.3x multiplier applied only to findings from executable files.""" + # 2 HIGH findings in run.py = 2 × 25 × 1.3 = 65 (float-based accumulation) + state: SkillspectorState = { + "filtered_findings": [ + _finding("E2", "HIGH", file="run.py"), + _finding("PE3", "HIGH", file="run.py"), + ], + "component_metadata": [ + {"path": "run.py", "type": "python", "lines": 5, "executable": True, "size_bytes": 200} + ], + "has_executable_scripts": True, + "manifest": {}, + "skill_path": "/tmp/skill", + "output_format": "json", + } + result = report(state) + assert result["risk_score"] == 65 + assert result["risk_severity"] == "HIGH" + assert result["risk_recommendation"] == "DO_NOT_INSTALL" + + +def test_report_doc_findings_no_multiplier() -> None: + """Findings from non-executable files (markdown/docs) are not multiplied.""" + # 2 HIGH in SKILL.md (non-executable) = 2 × 25 = 50 (no 1.3x) + state: SkillspectorState = { + "filtered_findings": [ + _finding("P1", "HIGH", file="SKILL.md"), + _finding("P2", "HIGH", file="SKILL.md"), + ], + "component_metadata": [ + {"path": "SKILL.md", "type": "markdown", "lines": 10, "executable": False, "size_bytes": 500}, + {"path": "run.py", "type": "python", "lines": 5, "executable": True, "size_bytes": 200} + ], + "has_executable_scripts": True, + "manifest": {}, + "skill_path": "/tmp/skill", + "output_format": "json", + } + result = report(state) + # Without the multiplier: 2 HIGH = 50, not 65 + assert result["risk_score"] == 50 + assert result["risk_severity"] == "MEDIUM"