From 439a0df2944049b0e600a1f18d3ae355bb5cda45 Mon Sep 17 00:00:00 2001 From: Daniel Barranquero Date: Wed, 24 Jun 2026 17:51:07 +0200 Subject: [PATCH 01/20] feat(sdk): replace detect-secrets with kingfisher for secret scanning --- prowler/lib/utils/utils.py | 247 +++++++++++++++++++++++++------------ pyproject.toml | 2 +- uv.lock | 30 ++--- 3 files changed, 181 insertions(+), 98 deletions(-) diff --git a/prowler/lib/utils/utils.py b/prowler/lib/utils/utils.py index c4b29f6cc10..3f4159a61e7 100644 --- a/prowler/lib/utils/utils.py +++ b/prowler/lib/utils/utils.py @@ -9,10 +9,12 @@ pass import re +import subprocess import sys import tempfile from datetime import datetime -from hashlib import sha512 +from functools import lru_cache +from hashlib import sha1, sha512 from io import TextIOWrapper from ipaddress import ip_address from os.path import exists @@ -20,41 +22,28 @@ from typing import Any, Optional from colorama import Style -from detect_secrets import SecretsCollection -from detect_secrets.settings import transient_settings from prowler.config.config import encoding_format_utf_8 from prowler.lib.logger import logger -default_detect_secrets_plugins = [ - {"name": "ArtifactoryDetector"}, - {"name": "AWSKeyDetector"}, - {"name": "AzureStorageKeyDetector"}, - {"name": "BasicAuthDetector"}, - {"name": "CloudantDetector"}, - {"name": "DiscordBotTokenDetector"}, - {"name": "GitHubTokenDetector"}, - {"name": "GitLabTokenDetector"}, - {"name": "Base64HighEntropyString", "limit": 6.0}, - {"name": "HexHighEntropyString", "limit": 3.0}, - {"name": "IbmCloudIamDetector"}, - {"name": "IbmCosHmacDetector"}, - # {"name": "IPPublicDetector"}, https://github.com/Yelp/detect-secrets/pull/885 - {"name": "JwtTokenDetector"}, - {"name": "KeywordDetector"}, - {"name": "MailchimpDetector"}, - {"name": "NpmDetector"}, - {"name": "OpenAIDetector"}, - {"name": "PrivateKeyDetector"}, - {"name": "PypiTokenDetector"}, - {"name": "SendGridDetector"}, - {"name": "SlackDetector"}, - {"name": "SoftlayerDetector"}, - {"name": "SquareOAuthDetector"}, - {"name": "StripeDetector"}, - # {"name": "TelegramBotTokenDetector"}, https://github.com/Yelp/detect-secrets/pull/878 - {"name": "TwilioKeyDetector"}, -] +# Default minimum confidence level for reporting findings. "low" is required to +# enable Kingfisher's built-in generic rules (Generic Password / Secret / API +# Key), which preserve the keyword-based coverage Prowler had with +# detect-secrets' KeywordDetector; at "medium" those generic rules do not fire. +# Possible values: "low", "medium", "high". +default_secrets_confidence = "low" + +# Kingfisher exit codes considered successful: 0 (no findings), 200 (findings), +# 205 (validated findings). +_kingfisher_success_exit_codes = (0, 200, 205) + + +@lru_cache(maxsize=1) +def get_kingfisher_binary() -> str: + """Return the path to the bundled Kingfisher binary (cached).""" + from kingfisher import get_binary_path + + return get_binary_path() def open_file(input_file: str, mode: str = "r") -> TextIOWrapper: @@ -116,72 +105,166 @@ def detect_secrets_scan( file=None, excluded_secrets: list[str] = None, detect_secrets_plugins: dict = None, + confidence: str = default_secrets_confidence, + validate: bool = False, ) -> list[dict[str, str]]: - """detect_secrets_scan scans the data or file for secrets using the detect-secrets library. + """detect_secrets_scan scans the data or file for secrets using Kingfisher. + + By default the scan runs fully offline (`--no-validate`, `--no-update-check`): + no network calls are made, so the scanned data is never sent anywhere. + Kingfisher's built-in ruleset is used at "low" confidence so its generic + keyword rules fire (see ``default_secrets_confidence``). + + When ``validate`` is True, Kingfisher additionally checks whether each + discovered secret is live by authenticating with it against the provider's + API (the secret itself is used as the credential; no extra permissions are + required). This makes outbound network calls and the discovered credential + is exercised against the provider, so it must be explicitly opted in. + Args: data (str): The data to scan for secrets. file (str): The file to scan for secrets. - excluded_secrets (list): A list of regex patterns to exclude from the scan. - detect_secrets_plugins (dict): The settings to use for the scan. + excluded_secrets (list): A list of regex patterns; any finding whose + source line matches one of them is excluded from the results. + detect_secrets_plugins (dict): Deprecated. Kept for backwards + compatibility with existing call sites; ignored by Kingfisher. + confidence (str): Minimum Kingfisher confidence to report ("low", + "medium" or "high"). Defaults to ``default_secrets_confidence``. + validate (bool): When True, validate discovered secrets against the + provider APIs (live check). Makes outbound network calls. Defaults + to False (fully offline). Returns: - dict: The secrets found in the - Raises: - Exception: If an error occurs during the scan. + list[dict] | None: A list of findings, each with ``filename``, + ``line_number``, ``type``, ``hashed_secret`` and ``is_verified`` + keys, or ``None`` when no secrets are found or an error occurs. + ``is_verified`` is True only when ``validate`` is True and the + secret was confirmed live. Examples: - >>> detect_secrets_scan(data="password=password") - [{'filename': 'data', 'hashed_secret': 'f7c3bc1d808e04732adf679965ccc34ca7ae3441', 'is_verified': False, 'line_number': 1, 'type': 'Secret Keyword'}] - >>> detect_secrets_scan(file="file.txt") - {'file.txt': [{'filename': 'file.txt', 'hashed_secret': 'f7c3bc1d808e04732adf679965ccc34ca7ae3441', 'is_verified': False, 'line_number': 1, 'type': 'Secret Keyword'}]} + >>> detect_secrets_scan(data='password = "Tr0ub4dor&3xKq9vLmZ"') + [{'filename': '/tmp/...', 'line_number': 1, 'type': 'Generic Password', 'hashed_secret': '...', 'is_verified': False}] """ + if detect_secrets_plugins is not None: + logger.debug( + "detect_secrets_plugins is deprecated and ignored when scanning with Kingfisher." + ) + + temp_data_file = None + temp_output_file = None try: - if not file: - temp_data_file = tempfile.NamedTemporaryFile(delete=False) - temp_data_file.write(bytes(data, encoding="raw_unicode_escape")) + if file: + scan_path = file + else: + # Ensure a trailing newline: Kingfisher does not scan the final line + # of a file when it is not newline-terminated, and serialized payloads + # (JSON dumps, joined log events, state-machine definitions) often are + # not. Appending "\n" does not change line numbers or secret content. + content = data if data.endswith("\n") else data + "\n" + temp_data_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt") + temp_data_file.write(bytes(content, encoding="raw_unicode_escape")) temp_data_file.close() + scan_path = temp_data_file.name + + temp_output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".json") + temp_output_file.close() + + command = [ + get_kingfisher_binary(), + "scan", + scan_path, + "--format", + "json", + "--output", + temp_output_file.name, + "--no-update-check", + "--confidence", + confidence, + ] + if validate: + # Live-validate discovered secrets against provider APIs. Use + # conservative defaults (short timeout, no retries) to limit the + # blast radius of the outbound calls. + command += [ + "--validation-timeout", + "5", + "--validation-retries", + "0", + ] + else: + command.append("--no-validate") + process = subprocess.run(command, capture_output=True, text=True) + if process.returncode not in _kingfisher_success_exit_codes: + logger.error( + f"Error scanning for secrets: Kingfisher exited with code " + f"{process.returncode}: {process.stderr.strip()[:500]}" + ) + return None - secrets = SecretsCollection() - - if not detect_secrets_plugins: - detect_secrets_plugins = default_detect_secrets_plugins - - settings = { - "plugins_used": detect_secrets_plugins, - "filters_used": [ - {"path": "detect_secrets.filters.common.is_invalid_file"}, - {"path": "detect_secrets.filters.common.is_known_false_positive"}, - {"path": "detect_secrets.filters.heuristic.is_likely_id_string"}, - {"path": "detect_secrets.filters.heuristic.is_potential_secret"}, - ], - } - - if excluded_secrets and len(excluded_secrets) > 0: - settings["filters_used"].append( + with open(temp_output_file.name, encoding=encoding_format_utf_8) as f: + output = f.read() + kingfisher_output = json.loads(output) if output.strip() else {} + + # Read source lines once to apply excluded_secrets against the full line + # (preserving detect-secrets' should_exclude_line semantics). + source_lines = [] + if excluded_secrets: + with open(scan_path, encoding=encoding_format_utf_8, errors="replace") as f: + source_lines = f.read().splitlines() + + findings = [] + for entry in kingfisher_output.get("findings", []): + rule = entry.get("rule", {}) + finding = entry.get("finding", {}) + line_number = finding.get("line") + + if excluded_secrets and line_number and line_number <= len(source_lines): + line_text = source_lines[line_number - 1] + if any(re.search(pattern, line_text) for pattern in excluded_secrets): + continue + + snippet = finding.get("snippet", "") or "" + findings.append( { - "path": "detect_secrets.filters.regex.should_exclude_line", - "pattern": excluded_secrets, + "filename": finding.get("path", scan_path), + "line_number": line_number, + "type": rule.get("name"), + # Non-security identifier for the matched secret (matches + # the detect-secrets output shape); not used for security. + "hashed_secret": ( + sha1(snippet.encode(), usedforsecurity=False).hexdigest() + if snippet + else None + ), + "is_verified": finding.get("validation", {}).get("status") + == "Active", } ) - with transient_settings(settings): - if file: - secrets.scan_file(file) - else: - secrets.scan_file(temp_data_file.name) - - if not file: - os.remove(temp_data_file.name) - - detect_secrets_output = secrets.json() - - if detect_secrets_output: - if file: - return detect_secrets_output[file] - else: - return detect_secrets_output[temp_data_file.name] - else: - return None + + return findings or None except Exception as e: logger.error(f"Error scanning for secrets: {e}") return None + finally: + for temp_file in (temp_data_file, temp_output_file): + if temp_file and os.path.exists(temp_file.name): + os.remove(temp_file.name) + + +def annotate_verified_secrets(report, secrets: list) -> None: + """Escalate and annotate a finding when any of its secrets is confirmed live. + + When secret validation (``--scan-secrets-validate`` / ``secrets_validate``) + confirms that a discovered secret is live, the finding is more severe than a + potential secret: its severity is raised to critical and a note is appended + to ``status_extended``. No-op when no secret was validated as live, so the + default offline behavior (and existing finding messages) is unchanged. + """ + if secrets and any(secret.get("is_verified") for secret in secrets): + from prowler.lib.check.models import Severity + + report.check_metadata.Severity = Severity.critical + report.status_extended += ( + " One or more of these secrets were confirmed to be live." + ) def validate_ip_address(ip_string): diff --git a/pyproject.toml b/pyproject.toml index 37bb1b003a7..a645bd0d102 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,11 +72,11 @@ dependencies = [ "dash==3.1.1", "dash-bootstrap-components==2.0.3", "defusedxml==0.7.1", - "detect-secrets==1.5.0", "dulwich==1.2.5", "google-api-python-client==2.163.0", "google-auth-httplib2==0.2.0", "jsonschema==4.23.0", + "kingfisher-bin==1.104.0", "kubernetes==32.0.1", "linode-api4==5.45.0", "markdown==3.10.2", diff --git a/uv.lock b/uv.lock index efd7008d204..3365c13f177 100644 --- a/uv.lock +++ b/uv.lock @@ -1795,19 +1795,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/84/d0/205d54408c08b13550c733c4b85429e7ead111c7f0014309637425520a9a/deprecated-1.3.1-py2.py3-none-any.whl", hash = "sha256:597bfef186b6f60181535a29fbe44865ce137a5079f295b479886c82729d5f3f", size = 11298, upload-time = "2025-10-30T08:19:00.758Z" }, ] -[[package]] -name = "detect-secrets" -version = "1.5.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pyyaml" }, - { name = "requests" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/69/67/382a863fff94eae5a0cf05542179169a1c49a4c8784a9480621e2066ca7d/detect_secrets-1.5.0.tar.gz", hash = "sha256:6bb46dcc553c10df51475641bb30fd69d25645cc12339e46c824c1e0c388898a", size = 97351, upload-time = "2024-05-06T17:46:19.721Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4e/5e/4f5fe4b89fde1dc3ed0eb51bd4ce4c0bca406246673d370ea2ad0c58d747/detect_secrets-1.5.0-py3-none-any.whl", hash = "sha256:e24e7b9b5a35048c313e983f76c4bd09dad89f045ff059e354f9943bf45aa060", size = 120341, upload-time = "2024-05-06T17:46:16.628Z" }, -] - [[package]] name = "dill" version = "0.4.1" @@ -2469,6 +2456,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/72/eb/698dc17e4beb315f83a47d47be128b8a63303dc8b7e7c31110410e10a68b/keystoneauth1-5.14.0-py3-none-any.whl", hash = "sha256:f8c503a95fdd83b5b72736657e4ffbb53d4b28b01763f23013f0294ed8a0e4b9", size = 343268, upload-time = "2026-05-13T09:09:24.573Z" }, ] +[[package]] +name = "kingfisher-bin" +version = "1.104.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/2b/324212f1baf482a7d4b66a2edf33073336735b67bb6b04a38d18fd9e67fb/kingfisher_bin-1.104.0-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:8e3840e67004a971fef80aba240ee5c3c5f7a3a343a6d1083a2751aaf866d5d3", size = 14057606, upload-time = "2026-06-22T03:03:01.419Z" }, + { url = "https://files.pythonhosted.org/packages/21/0a/cbf964da5102657cb9be4a59db7c9f7807ef88f9419673b7486daba785d3/kingfisher_bin-1.104.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:b838313411fa2166a318a45aec2cfcc238e2f30f5292e309ca1129a73180c851", size = 12468386, upload-time = "2026-06-22T03:03:03.951Z" }, + { url = "https://files.pythonhosted.org/packages/0b/a0/cc7ef0ac28f147cdfc9d80e4239fff11c1329831c6f57510c929e848753c/kingfisher_bin-1.104.0-py3-none-manylinux_2_17_aarch64.musllinux_1_2_aarch64.whl", hash = "sha256:0a94abbf2154ef8a3b4845cc0240e2321cdc19e0f5c7f585ea5252e76b242f68", size = 13943188, upload-time = "2026-06-22T03:03:06.378Z" }, + { url = "https://files.pythonhosted.org/packages/17/79/827cfd7787885798a00b5ab905bdc866ef6f8deeff0f708679b06bc9baaa/kingfisher_bin-1.104.0-py3-none-manylinux_2_17_x86_64.musllinux_1_2_x86_64.whl", hash = "sha256:f381274b946f7f68ed72911770fff72024f2192c6e2e2158f2a7fbfda8c482fb", size = 14757594, upload-time = "2026-06-22T03:03:08.66Z" }, + { url = "https://files.pythonhosted.org/packages/da/93/b0061fc69cd10382f647f9266823f213fd0b3f168f8b5bd9151a2370abb1/kingfisher_bin-1.104.0-py3-none-win_amd64.whl", hash = "sha256:f228d0dd61a738673b1c536e965a5661a83b1ee6ca64186a46ba6ea81ab4fd0b", size = 27697957, upload-time = "2026-06-22T03:03:11.268Z" }, + { url = "https://files.pythonhosted.org/packages/a5/fb/f062665b4eb3f77e799cb6335e56bc2945aea83787888a6c1ab329858d0a/kingfisher_bin-1.104.0-py3-none-win_arm64.whl", hash = "sha256:a7774d9d11815ca946bd80b8c9df0f1d39c36cb5a21def3323b99d148dc63065", size = 26063704, upload-time = "2026-06-22T03:03:14.08Z" }, +] + [[package]] name = "kubernetes" version = "32.0.1" @@ -3605,12 +3605,12 @@ dependencies = [ { name = "dash" }, { name = "dash-bootstrap-components" }, { name = "defusedxml" }, - { name = "detect-secrets" }, { name = "dulwich" }, { name = "google-api-python-client" }, { name = "google-auth-httplib2" }, { name = "h2" }, { name = "jsonschema" }, + { name = "kingfisher-bin" }, { name = "kubernetes" }, { name = "linode-api4" }, { name = "markdown" }, @@ -3714,12 +3714,12 @@ requires-dist = [ { name = "dash", specifier = "==3.1.1" }, { name = "dash-bootstrap-components", specifier = "==2.0.3" }, { name = "defusedxml", specifier = "==0.7.1" }, - { name = "detect-secrets", specifier = "==1.5.0" }, { name = "dulwich", specifier = "==1.2.5" }, { name = "google-api-python-client", specifier = "==2.163.0" }, { name = "google-auth-httplib2", specifier = "==0.2.0" }, { name = "h2", specifier = "==4.3.0" }, { name = "jsonschema", specifier = "==4.23.0" }, + { name = "kingfisher-bin", specifier = "==1.104.0" }, { name = "kubernetes", specifier = "==32.0.1" }, { name = "linode-api4", specifier = "==5.45.0" }, { name = "markdown", specifier = "==3.10.2" }, From 018c58d38869103a1463c552cde90862146781f9 Mon Sep 17 00:00:00 2001 From: Daniel Barranquero Date: Wed, 24 Jun 2026 17:51:12 +0200 Subject: [PATCH 02/20] feat(sdk): add opt-in secret validation with critical escalation for verified secrets --- prowler/config/config.yaml | 7 +++++++ prowler/lib/cli/parser.py | 12 ++++++++++++ ...oscaling_find_secrets_ec2_launch_configuration.py | 6 +++++- .../awslambda_function_no_secrets_in_code.py | 8 +++++++- .../awslambda_function_no_secrets_in_variables.py | 6 +++++- .../cloudformation_stack_outputs_find_secrets.py | 6 +++++- .../cloudwatch_log_group_no_secrets_in_logs.py | 11 ++++++++++- .../codebuild_project_no_secrets_in_variables.py | 8 +++++++- .../ec2_instance_secrets_user_data.py | 4 +++- .../ec2_launch_template_no_secrets.py | 6 +++++- .../ecs_task_definitions_no_environment_secrets.py | 6 +++++- .../glue_etl_jobs_no_secrets_in_arguments.py | 8 +++++++- .../ssm/ssm_document_secrets/ssm_document_secrets.py | 4 +++- ...unctions_statemachine_no_secrets_in_definition.py | 6 +++++- prowler/providers/common/models.py | 10 ++++++++++ .../blockstorage_snapshot_metadata_sensitive_data.py | 6 +++++- .../blockstorage_volume_metadata_sensitive_data.py | 6 +++++- .../compute_instance_metadata_sensitive_data.py | 4 +++- ...bjectstorage_container_metadata_sensitive_data.py | 6 +++++- 19 files changed, 114 insertions(+), 16 deletions(-) diff --git a/prowler/config/config.yaml b/prowler/config/config.yaml index 9d41a0e5928..78309786ef2 100644 --- a/prowler/config/config.yaml +++ b/prowler/config/config.yaml @@ -423,6 +423,13 @@ aws: # Patterns to ignore in the secrets checks secrets_ignore_patterns: [] + # Validate discovered secrets by checking whether they are live against the + # provider APIs. WARNING: this makes outbound network calls that authenticate + # with the discovered secret itself; the credential is exercised against the + # provider and the call will appear in the audited account's logs (and may + # trigger its monitoring). Disabled by default (scans stay fully offline). + secrets_validate: False + # AWS Secrets Manager Configuration # aws.secretsmanager_secret_unused # Maximum number of days a secret can be unused diff --git a/prowler/lib/cli/parser.py b/prowler/lib/cli/parser.py index b65a702fbca..67a7ea28246 100644 --- a/prowler/lib/cli/parser.py +++ b/prowler/lib/cli/parser.py @@ -473,6 +473,18 @@ def __init_config_parser__(self): default=default_fixer_config_file_path, help="Set configuration fixer file path", ) + config_parser.add_argument( + "--scan-secrets-validate", + action="store_true", + default=False, + help=( + "Validate secrets discovered by the secrets checks by checking " + "whether they are live against the provider APIs. WARNING: this " + "makes outbound network calls using the discovered secret itself; " + "the credential is exercised against the provider and the call " + "appears in the audited account's logs. Disabled by default." + ), + ) def __init_custom_checks_metadata_parser__(self): # CustomChecksMetadata diff --git a/prowler/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration.py b/prowler/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration.py index 6595b710852..209e54ab078 100644 --- a/prowler/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration.py +++ b/prowler/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration.py @@ -4,7 +4,7 @@ from prowler.config.config import encoding_format_utf_8 from prowler.lib.check.models import Check, Check_Report_AWS from prowler.lib.logger import logger -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan from prowler.providers.aws.services.autoscaling.autoscaling_client import ( autoscaling_client, ) @@ -48,11 +48,15 @@ def execute(self): detect_secrets_plugins=autoscaling_client.audit_config.get( "detect_secrets_plugins" ), + validate=autoscaling_client.audit_config.get( + "secrets_validate", False + ), ) if has_secrets: report.status = "FAIL" report.status_extended = f"Potential secret found in autoscaling {configuration.name} User Data." + annotate_verified_secrets(report, has_secrets) else: report.status = "PASS" report.status_extended = f"No secrets found in autoscaling {configuration.name} User Data." diff --git a/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code.py b/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code.py index 51c56a20112..b4350a37d9a 100644 --- a/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code.py +++ b/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code.py @@ -2,7 +2,7 @@ import tempfile from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan from prowler.providers.aws.services.awslambda.awslambda_client import awslambda_client @@ -28,6 +28,7 @@ def execute(self): # List all files files_in_zip = next(os.walk(tmp_dir_name))[2] secrets_findings = [] + all_secrets = [] for file in files_in_zip: detect_secrets_output = detect_secrets_scan( file=f"{tmp_dir_name}/{file}", @@ -35,8 +36,12 @@ def execute(self): detect_secrets_plugins=awslambda_client.audit_config.get( "detect_secrets_plugins", ), + validate=awslambda_client.audit_config.get( + "secrets_validate", False + ), ) if detect_secrets_output: + all_secrets.extend(detect_secrets_output) for ( secret ) in ( @@ -59,6 +64,7 @@ def execute(self): final_output_string = "; ".join(secrets_findings) report.status = "FAIL" report.status_extended = f"Potential {'secrets' if len(secrets_findings) > 1 else 'secret'} found in Lambda function {function.name} code -> {final_output_string}." + annotate_verified_secrets(report, all_secrets) findings.append(report) diff --git a/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables.py b/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables.py index 9448b0239f8..2a3ec0e6f63 100644 --- a/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables.py +++ b/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables.py @@ -1,7 +1,7 @@ import json from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan from prowler.providers.aws.services.awslambda.awslambda_client import awslambda_client @@ -26,6 +26,9 @@ def execute(self): detect_secrets_plugins=awslambda_client.audit_config.get( "detect_secrets_plugins", ), + validate=awslambda_client.audit_config.get( + "secrets_validate", False + ), ) original_env_vars = [] for name, value in function.environment.items(): @@ -39,6 +42,7 @@ def execute(self): ) report.status = "FAIL" report.status_extended = f"Potential secret found in Lambda function {function.name} variables -> {secrets_string}." + annotate_verified_secrets(report, detect_secrets_output) findings.append(report) diff --git a/prowler/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets.py b/prowler/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets.py index f9b47932bbe..89919589139 100644 --- a/prowler/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets.py +++ b/prowler/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets.py @@ -1,5 +1,5 @@ from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan from prowler.providers.aws.services.cloudformation.cloudformation_client import ( cloudformation_client, ) @@ -32,6 +32,9 @@ def execute(self): detect_secrets_plugins=cloudformation_client.audit_config.get( "detect_secrets_plugins", ), + validate=cloudformation_client.audit_config.get( + "secrets_validate", False + ), ) # If secrets are found, update the report status if detect_secrets_output: @@ -43,6 +46,7 @@ def execute(self): ) report.status = "FAIL" report.status_extended = f"Potential secret found in CloudFormation Stack {stack.name} Outputs -> {secrets_string}." + annotate_verified_secrets(report, detect_secrets_output) else: report.status = "PASS" diff --git a/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py b/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py index 5154a5acb75..de1a2f9946b 100644 --- a/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py +++ b/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py @@ -1,7 +1,7 @@ from json import dumps, loads from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan from prowler.providers.aws.services.cloudwatch.cloudwatch_service import ( convert_to_cloudwatch_timestamp_format, ) @@ -22,6 +22,7 @@ def execute(self): f"No secrets found in {log_group.name} log group." ) log_group_secrets = [] + all_secrets = [] if log_group.log_streams: for log_stream_name in log_group.log_streams: log_stream_secrets = {} @@ -37,9 +38,13 @@ def execute(self): detect_secrets_plugins=logs_client.audit_config.get( "detect_secrets_plugins", ), + validate=logs_client.audit_config.get( + "secrets_validate", False + ), ) if log_stream_secrets_output: + all_secrets.extend(log_stream_secrets_output) for secret in log_stream_secrets_output: flagged_event = log_group.log_streams[log_stream_name][ secret["line_number"] - 1 @@ -73,6 +78,9 @@ def execute(self): detect_secrets_plugins=logs_client.audit_config.get( "detect_secrets_plugins" ), + validate=logs_client.audit_config.get( + "secrets_validate", False + ), ) if event_detect_secrets_output: for secret in event_detect_secrets_output: @@ -99,6 +107,7 @@ def execute(self): secrets_string = "; ".join(log_group_secrets) report.status = "FAIL" report.status_extended = f"Potential secrets found in log group {log_group.name} {secrets_string}." + annotate_verified_secrets(report, all_secrets) findings.append(report) return findings diff --git a/prowler/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables.py b/prowler/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables.py index 8d031cc25ad..4689fa32a7f 100644 --- a/prowler/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables.py +++ b/prowler/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables.py @@ -1,7 +1,7 @@ import json from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan from prowler.providers.aws.services.codebuild.codebuild_client import codebuild_client @@ -19,6 +19,7 @@ def execute(self): report.status = "PASS" report.status_extended = f"CodeBuild project {project.name} does not have sensitive environment plaintext credentials." secrets_found = [] + all_secrets = [] if project.environment_variables: for env_var in project.environment_variables: @@ -32,8 +33,12 @@ def execute(self): detect_secrets_plugins=codebuild_client.audit_config.get( "detect_secrets_plugins", ), + validate=codebuild_client.audit_config.get( + "secrets_validate", False + ), ) if detect_secrets_output: + all_secrets.extend(detect_secrets_output) secrets_info = [ f"{secret['type']} in variable {env_var.name}" for secret in detect_secrets_output @@ -43,6 +48,7 @@ def execute(self): if secrets_found: report.status = "FAIL" report.status_extended = f"CodeBuild project {project.name} has sensitive environment plaintext credentials in variables: {', '.join(secrets_found)}." + annotate_verified_secrets(report, all_secrets) findings.append(report) diff --git a/prowler/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data.py b/prowler/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data.py index 3c3864c4799..c991818841b 100644 --- a/prowler/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data.py +++ b/prowler/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data.py @@ -4,7 +4,7 @@ from prowler.config.config import encoding_format_utf_8 from prowler.lib.check.models import Check, Check_Report_AWS from prowler.lib.logger import logger -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan from prowler.providers.aws.services.ec2.ec2_client import ec2_client @@ -42,6 +42,7 @@ def execute(self): detect_secrets_plugins=ec2_client.audit_config.get( "detect_secrets_plugins" ), + validate=ec2_client.audit_config.get("secrets_validate", False), ) if detect_secrets_output: secrets_string = ", ".join( @@ -52,6 +53,7 @@ def execute(self): ) report.status = "FAIL" report.status_extended = f"Potential secret found in EC2 instance {instance.id} User Data -> {secrets_string}." + annotate_verified_secrets(report, detect_secrets_output) else: report.status = "PASS" diff --git a/prowler/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets.py b/prowler/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets.py index 823553bcdf3..3f349720d99 100644 --- a/prowler/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets.py +++ b/prowler/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets.py @@ -4,7 +4,7 @@ from prowler.config.config import encoding_format_utf_8 from prowler.lib.check.models import Check, Check_Report_AWS from prowler.lib.logger import logger -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan from prowler.providers.aws.services.ec2.ec2_client import ec2_client @@ -18,6 +18,7 @@ def execute(self): report = Check_Report_AWS(metadata=self.metadata(), resource=template) versions_with_secrets = [] + all_secrets = [] for version in template.versions: if not version.template_data.user_data: @@ -48,9 +49,11 @@ def execute(self): detect_secrets_plugins=ec2_client.audit_config.get( "detect_secrets_plugins" ), + validate=ec2_client.audit_config.get("secrets_validate", False), ) if version_secrets: + all_secrets.extend(version_secrets) secrets_string = ", ".join( [ f"{secret['type']} on line {secret['line_number']}" @@ -64,6 +67,7 @@ def execute(self): if len(versions_with_secrets) > 0: report.status = "FAIL" report.status_extended = f"Potential secret found in User Data for EC2 Launch Template {template.name} in template versions: {', '.join(versions_with_secrets)}." + annotate_verified_secrets(report, all_secrets) else: report.status = "PASS" report.status_extended = f"No secrets found in User Data of any version for EC2 Launch Template {template.name}." diff --git a/prowler/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets.py b/prowler/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets.py index cd835d01498..2a1ca2f2dfd 100644 --- a/prowler/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets.py +++ b/prowler/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets.py @@ -1,7 +1,7 @@ from json import dumps from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan from prowler.providers.aws.services.ecs.ecs_client import ecs_client @@ -18,6 +18,7 @@ def execute(self): report.resource_id = f"{task_definition.name}:{task_definition.revision}" report.status = "PASS" extended_status_parts = [] + all_secrets = [] for container in task_definition.container_definitions: container_secrets_found = [] @@ -36,8 +37,10 @@ def execute(self): detect_secrets_plugins=ecs_client.audit_config.get( "detect_secrets_plugins", ), + validate=ecs_client.audit_config.get("secrets_validate", False), ) if detect_secrets_output: + all_secrets.extend(detect_secrets_output) secrets_string = ", ".join( [ f"{secret['type']} on the environment variable {original_env_vars[secret['line_number'] - 2]}" @@ -56,6 +59,7 @@ def execute(self): + "; ".join(extended_status_parts) + "." ) + annotate_verified_secrets(report, all_secrets) else: report.status_extended = f"No secrets found in variables of ECS task definition {task_definition.name} with revision {task_definition.revision}." findings.append(report) diff --git a/prowler/providers/aws/services/glue/glue_etl_jobs_no_secrets_in_arguments/glue_etl_jobs_no_secrets_in_arguments.py b/prowler/providers/aws/services/glue/glue_etl_jobs_no_secrets_in_arguments/glue_etl_jobs_no_secrets_in_arguments.py index 50c92f86193..a7848032843 100644 --- a/prowler/providers/aws/services/glue/glue_etl_jobs_no_secrets_in_arguments/glue_etl_jobs_no_secrets_in_arguments.py +++ b/prowler/providers/aws/services/glue/glue_etl_jobs_no_secrets_in_arguments/glue_etl_jobs_no_secrets_in_arguments.py @@ -1,7 +1,7 @@ import json from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan from prowler.providers.aws.services.glue.glue_client import glue_client @@ -27,6 +27,7 @@ def execute(self): if job.arguments: secrets_found = [] + all_secrets = [] for arg_name, arg_value in job.arguments.items(): detect_secrets_output = detect_secrets_scan( data=json.dumps({arg_name: arg_value}), @@ -34,8 +35,12 @@ def execute(self): detect_secrets_plugins=glue_client.audit_config.get( "detect_secrets_plugins", ), + validate=glue_client.audit_config.get( + "secrets_validate", False + ), ) if detect_secrets_output: + all_secrets.extend(detect_secrets_output) secrets_found.extend( [ f"{secret['type']} in argument {arg_name}" @@ -46,6 +51,7 @@ def execute(self): if secrets_found: report.status = "FAIL" report.status_extended = f"Potential secrets found in Glue job {job.name} default arguments: {', '.join(secrets_found)}." + annotate_verified_secrets(report, all_secrets) findings.append(report) diff --git a/prowler/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets.py b/prowler/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets.py index 0ec8502babe..42a04e223f8 100644 --- a/prowler/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets.py +++ b/prowler/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets.py @@ -1,7 +1,7 @@ import json from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan from prowler.providers.aws.services.ssm.ssm_client import ssm_client @@ -25,6 +25,7 @@ def execute(self): detect_secrets_plugins=ssm_client.audit_config.get( "detect_secrets_plugins" ), + validate=ssm_client.audit_config.get("secrets_validate", False), ) if detect_secrets_output: secrets_string = ", ".join( @@ -35,6 +36,7 @@ def execute(self): ) report.status = "FAIL" report.status_extended = f"Potential secret found in SSM Document {document.name} -> {secrets_string}." + annotate_verified_secrets(report, detect_secrets_output) findings.append(report) diff --git a/prowler/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition.py b/prowler/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition.py index db047100296..5c39215f748 100644 --- a/prowler/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition.py +++ b/prowler/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition.py @@ -1,5 +1,5 @@ from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan from prowler.providers.aws.services.stepfunctions.stepfunctions_client import ( stepfunctions_client, ) @@ -25,6 +25,9 @@ def execute(self) -> list[Check_Report_AWS]: detect_secrets_plugins=stepfunctions_client.audit_config.get( "detect_secrets_plugins", ), + validate=stepfunctions_client.audit_config.get( + "secrets_validate", False + ), ) if detect_secrets_output: @@ -40,6 +43,7 @@ def execute(self) -> list[Check_Report_AWS]: f"found in Step Functions state machine {state_machine.name} definition " f"-> {secrets_string}." ) + annotate_verified_secrets(report, detect_secrets_output) findings.append(report) return findings diff --git a/prowler/providers/common/models.py b/prowler/providers/common/models.py index 120cc1a3747..84e71c48093 100644 --- a/prowler/providers/common/models.py +++ b/prowler/providers/common/models.py @@ -49,6 +49,16 @@ def __init__(self, arguments, bulk_checks_metadata): if updated_audit_config: provider._audit_config = updated_audit_config + # Secrets validation: --scan-secrets-validate opts into live validation + # of discovered secrets. Set the audit_config key directly so it applies + # even for providers whose default config does not declare it. + self.scan_secrets_validate = getattr(arguments, "scan_secrets_validate", False) + if self.scan_secrets_validate: + provider = Provider.get_global_provider() + audit_config = provider.audit_config or {} + audit_config["secrets_validate"] = True + provider._audit_config = audit_config + # Check output directory, if it is not created -> create it if self.output_directory and not self.fixer: if not isdir(self.output_directory): diff --git a/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.py b/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.py index 95de6288713..d072d0164c6 100644 --- a/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.py +++ b/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.py @@ -2,7 +2,7 @@ from typing import List from prowler.lib.check.models import Check, CheckReportOpenStack -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan from prowler.providers.openstack.services.blockstorage.blockstorage_client import ( blockstorage_client, ) @@ -38,6 +38,9 @@ def execute(self) -> List[CheckReportOpenStack]: detect_secrets_plugins=blockstorage_client.audit_config.get( "detect_secrets_plugins" ), + validate=blockstorage_client.audit_config.get( + "secrets_validate", False + ), ) if detect_secrets_output: @@ -54,6 +57,7 @@ def execute(self) -> List[CheckReportOpenStack]: ) report.status = "FAIL" report.status_extended = f"Snapshot {snapshot.name} ({snapshot.id}) metadata contains potential secrets -> {secrets_string}." + annotate_verified_secrets(report, detect_secrets_output) else: report.status_extended = f"Snapshot {snapshot.name} ({snapshot.id}) has no metadata (no sensitive data exposure risk)." diff --git a/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.py b/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.py index 1bfa84c3df4..7cc07fd1d98 100644 --- a/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.py +++ b/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.py @@ -2,7 +2,7 @@ from typing import List from prowler.lib.check.models import Check, CheckReportOpenStack -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan from prowler.providers.openstack.services.blockstorage.blockstorage_client import ( blockstorage_client, ) @@ -38,6 +38,9 @@ def execute(self) -> List[CheckReportOpenStack]: detect_secrets_plugins=blockstorage_client.audit_config.get( "detect_secrets_plugins" ), + validate=blockstorage_client.audit_config.get( + "secrets_validate", False + ), ) if detect_secrets_output: @@ -54,6 +57,7 @@ def execute(self) -> List[CheckReportOpenStack]: ) report.status = "FAIL" report.status_extended = f"Volume {volume.name} ({volume.id}) metadata contains potential secrets -> {secrets_string}." + annotate_verified_secrets(report, detect_secrets_output) else: report.status_extended = f"Volume {volume.name} ({volume.id}) has no metadata (no sensitive data exposure risk)." diff --git a/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.py b/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.py index 5df151c939f..6e0ec2b206b 100644 --- a/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.py +++ b/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.py @@ -2,7 +2,7 @@ from typing import List from prowler.lib.check.models import Check, CheckReportOpenStack -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan from prowler.providers.openstack.services.compute.compute_client import compute_client @@ -36,6 +36,7 @@ def execute(self) -> List[CheckReportOpenStack]: detect_secrets_plugins=compute_client.audit_config.get( "detect_secrets_plugins" ), + validate=compute_client.audit_config.get("secrets_validate", False), ) if detect_secrets_output: @@ -50,6 +51,7 @@ def execute(self) -> List[CheckReportOpenStack]: ) report.status = "FAIL" report.status_extended = f"Instance {instance.name} ({instance.id}) metadata contains potential secrets -> {secrets_string}." + annotate_verified_secrets(report, detect_secrets_output) else: report.status_extended = f"Instance {instance.name} ({instance.id}) has no metadata (no sensitive data exposure risk)." diff --git a/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.py b/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.py index 94d281bf328..a4410712709 100644 --- a/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.py +++ b/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.py @@ -2,7 +2,7 @@ from typing import List from prowler.lib.check.models import Check, CheckReportOpenStack -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan from prowler.providers.openstack.services.objectstorage.objectstorage_client import ( objectstorage_client, ) @@ -40,6 +40,9 @@ def execute(self) -> List[CheckReportOpenStack]: detect_secrets_plugins=objectstorage_client.audit_config.get( "detect_secrets_plugins" ), + validate=objectstorage_client.audit_config.get( + "secrets_validate", False + ), ) if detect_secrets_output: @@ -56,6 +59,7 @@ def execute(self) -> List[CheckReportOpenStack]: ) report.status = "FAIL" report.status_extended = f"Container {container.name} metadata contains potential secrets -> {secrets_string}." + annotate_verified_secrets(report, detect_secrets_output) else: report.status_extended = f"Container {container.name} has no metadata (no sensitive data exposure risk)." From 26004024fee97bb779ece0994189d17444fe81d7 Mon Sep 17 00:00:00 2001 From: Daniel Barranquero Date: Wed, 24 Jun 2026 17:51:17 +0200 Subject: [PATCH 03/20] test(sdk): update secret checks tests and fixtures for kingfisher --- tests/lib/utils/utils_test.py | 59 +++++++++++--- ...d_secrets_ec2_launch_configuration_test.py | 2 +- .../fixtures/fixture | 6 +- .../fixtures/fixture.gz | Bin 93 -> 135 bytes ...lambda_function_no_secrets_in_code_test.py | 4 +- ...a_function_no_secrets_in_variables_test.py | 32 ++++++-- ...rmation_stack_outputs_find_secrets_test.py | 7 +- ...watch_log_group_no_secrets_in_logs_test.py | 4 +- ...ld_project_no_secrets_in_variables_test.py | 59 +++++++++++--- .../ec2_instance_secrets_user_data_test.py | 61 ++++++++++++++- .../fixtures/fixture | 6 +- .../fixtures/fixture.gz | Bin 93 -> 206 bytes .../ec2_launch_template_no_secrets_test.py | 14 ++-- .../fixtures/fixture | 6 +- .../fixtures/fixture.gz | Bin 93 -> 206 bytes ...definitions_no_environment_secrets_test.py | 72 +++++++++++++++--- .../ssm_document_secrets_test.py | 4 +- ...temachine_no_secrets_in_definition_test.py | 2 +- ...e_snapshot_metadata_sensitive_data_test.py | 14 ++-- ...age_volume_metadata_sensitive_data_test.py | 14 ++-- ...e_instance_metadata_sensitive_data_test.py | 16 ++-- ..._container_metadata_sensitive_data_test.py | 4 +- 22 files changed, 296 insertions(+), 90 deletions(-) diff --git a/tests/lib/utils/utils_test.py b/tests/lib/utils/utils_test.py index c8db5a62d0a..5e093eaa975 100644 --- a/tests/lib/utils/utils_test.py +++ b/tests/lib/utils/utils_test.py @@ -4,7 +4,7 @@ from time import mktime import pytest -from mock import patch +from mock import Mock, patch from prowler.lib.utils.utils import ( detect_secrets_scan, @@ -110,7 +110,7 @@ def test_validate_ip_address(self): class Test_detect_secrets_scan: def test_detect_secrets_scan_data(self): - data = "password=password" + data = 'password = "Tr0ub4dor3xKq9vLmZ"' secrets_detected = detect_secrets_scan(data=data, excluded_secrets=[]) assert type(secrets_detected) is list assert len(secrets_detected) == 1 @@ -118,15 +118,15 @@ def test_detect_secrets_scan_data(self): assert "hashed_secret" in secrets_detected[0] assert "is_verified" in secrets_detected[0] assert secrets_detected[0]["line_number"] == 1 - assert secrets_detected[0]["type"] == "Secret Keyword" + assert secrets_detected[0]["type"] == "Generic Password" def test_detect_secrets_scan_no_secrets_data(self): data = "" assert detect_secrets_scan(data=data) is None def test_detect_secrets_scan_file_with_secrets(self): - temp_data_file = tempfile.NamedTemporaryFile(delete=False) - temp_data_file.write(b"password=password") + temp_data_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt") + temp_data_file.write(b'password = "Tr0ub4dor3xKq9vLmZ"\n') temp_data_file.seek(0) secrets_detected = detect_secrets_scan( file=temp_data_file.name, excluded_secrets=[] @@ -137,7 +137,7 @@ def test_detect_secrets_scan_file_with_secrets(self): assert "hashed_secret" in secrets_detected[0] assert "is_verified" in secrets_detected[0] assert secrets_detected[0]["line_number"] == 1 - assert secrets_detected[0]["type"] == "Secret Keyword" + assert secrets_detected[0]["type"] == "Generic Password" os.remove(temp_data_file.name) def test_detect_secrets_scan_file_no_secrets(self): @@ -165,18 +165,55 @@ def test_detect_secrets_using_regex_file(self): os.remove(temp_data_file.name) def test_detect_secrets_secrets_using_regex(self): - data = "MYSQL_ALLOW_EMPTY_PASSWORD=password, MYSQL_PASSWORD=password" - # Update the regex to exclude only the exact key "MYSQL_ALLOW_EMPTY_PASSWORD" + # Two secrets on separate lines; exclude the line with the + # ALLOW_EMPTY_PASSWORD key, leaving only the MYSQL_PASSWORD secret. + data = ( + 'MYSQL_ALLOW_EMPTY_PASSWORD="Tr0ub4dor3xKq9vLmZ"\n' + 'MYSQL_PASSWORD="Xy9zPq2wKmRtVbN4Lm"' + ) secrets_detected = detect_secrets_scan( - data=data, excluded_secrets=["^MYSQL_ALLOW_EMPTY_PASSWORD$"] + data=data, excluded_secrets=[".*ALLOW_EMPTY_PASSWORD.*"] ) assert type(secrets_detected) is list assert len(secrets_detected) == 1 assert "filename" in secrets_detected[0] assert "hashed_secret" in secrets_detected[0] assert "is_verified" in secrets_detected[0] - assert secrets_detected[0]["line_number"] == 1 - assert secrets_detected[0]["type"] == "Secret Keyword" + assert secrets_detected[0]["line_number"] == 2 + assert secrets_detected[0]["type"] == "Generic Password" + + def test_detect_secrets_scan_offline_by_default(self): + # By default the scan is fully offline: --no-validate is passed and no + # validation flags are added. + with ( + patch( + "prowler.lib.utils.utils.get_kingfisher_binary", + return_value="kingfisher", + ), + patch("prowler.lib.utils.utils.subprocess.run") as mock_run, + ): + mock_run.return_value = Mock(returncode=0, stdout="", stderr="") + detect_secrets_scan(data="password = 'value'") + command = mock_run.call_args[0][0] + assert "--no-validate" in command + assert "--validation-timeout" not in command + + def test_detect_secrets_scan_validate_enabled(self): + # With validate=True, --no-validate is dropped and conservative + # validation flags are added. + with ( + patch( + "prowler.lib.utils.utils.get_kingfisher_binary", + return_value="kingfisher", + ), + patch("prowler.lib.utils.utils.subprocess.run") as mock_run, + ): + mock_run.return_value = Mock(returncode=0, stdout="", stderr="") + detect_secrets_scan(data="password = 'value'", validate=True) + command = mock_run.call_args[0][0] + assert "--no-validate" not in command + assert "--validation-timeout" in command + assert "--validation-retries" in command class Test_hash_sha512: diff --git a/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration_test.py b/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration_test.py index 10057618349..053e40fe2a7 100644 --- a/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration_test.py +++ b/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration_test.py @@ -104,7 +104,7 @@ def test_one_autoscaling_with_secrets(self): InstanceType="t1.micro", KeyName="the_keys", SecurityGroups=["default", "default2"], - UserData="DB_PASSWORD=foobar123", + UserData='DB_PASSWORD="Tr0ub4dor3xKq9vLmZ"', ) launch_configuration_arn = autoscaling_client.describe_launch_configurations( LaunchConfigurationNames=[launch_configuration_name] diff --git a/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/fixtures/fixture b/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/fixtures/fixture index 2fb5138932d..c591954ab4b 100644 --- a/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/fixtures/fixture +++ b/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/fixtures/fixture @@ -1,4 +1,4 @@ -DB_PASSWORD=foobar123 +DB_PASSWORD="Tr0ub4dor3xKq9vLmZ" DB_USER=foo -API_KEY=12345abcd -SERVICE_PASSWORD=bbaabb45 +API_KEY=s3rv1c3Acc0untS3cr3tV4lu3x9 +SERVICE_PASSWORD="Xy9zPq2wKmRtVbN4" diff --git a/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/fixtures/fixture.gz b/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/fixtures/fixture.gz index 6120fcfbc43b6f852eee6457e370abb2dc7ba9a4..15e68af70e522f5a017d84bc3425e2bc6f23f39d 100644 GIT binary patch literal 135 zcmV;20C@i&iwFofS~+S0|7K};bairN0CRDQ4{!_)4)+gou~iBwGAKe~m$U%&ZJ0|5I%A`bun diff --git a/tests/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code_test.py b/tests/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code_test.py index 5f97082c1b5..79c4227bbac 100644 --- a/tests/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code_test.py +++ b/tests/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code_test.py @@ -19,7 +19,7 @@ LAMBDA_FUNCTION_ARN = f"arn:aws:lambda:{AWS_REGION_US_EAST_1}:{AWS_ACCOUNT_NUMBER}:function/{LAMBDA_FUNCTION_NAME}" LAMBDA_FUNCTION_CODE_WITH_SECRETS = """ def lambda_handler(event, context): - db_password = "test-password" + db_password = "Tr0ub4dor3xKq9vLmZ" print("custom log event") return event """ @@ -126,7 +126,7 @@ def test_function_code_with_secrets(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secret found in Lambda function {LAMBDA_FUNCTION_NAME} code -> lambda_function.py: Secret Keyword on line 3." + == f"Potential secret found in Lambda function {LAMBDA_FUNCTION_NAME} code -> lambda_function.py: Generic Password on line 3." ) assert result[0].resource_tags == [] diff --git a/tests/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables_test.py b/tests/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables_test.py index 6ae517dfe2a..9eb49b7e578 100644 --- a/tests/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables_test.py +++ b/tests/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables_test.py @@ -97,7 +97,7 @@ def test_function_secrets_in_keyword(self): arn=function_arn, region=AWS_REGION_US_EAST_1, runtime=function_runtime, - environment={"db_password": "test-password"}, + environment={"db_password": "Tr0ub4dor3xKq9vLmZ"}, ) } @@ -126,7 +126,7 @@ def test_function_secrets_in_keyword(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secret found in Lambda function {function_name} variables -> Secret Keyword in variable db_password." + == f"Potential secret found in Lambda function {function_name} variables -> Generic Password in variable db_password." ) assert result[0].resource_tags == [] @@ -145,7 +145,9 @@ def test_function_secrets_in_keyword_and_variable(self): arn=function_arn, region=AWS_REGION_US_EAST_1, runtime=function_runtime, - environment={"db_password": "srv://admin:pass@db"}, + environment={ + "db_password": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U" + }, ) } @@ -172,9 +174,18 @@ def test_function_secrets_in_keyword_and_variable(self): assert result[0].resource_id == function_name assert result[0].resource_arn == function_arn assert result[0].status == "FAIL" + # Kingfisher reports both the generic keyword rule and the JWT rule + # for the same value; their order is not guaranteed, so assert on + # presence rather than a fixed concatenation order. + assert result[0].status_extended.startswith( + f"Potential secret found in Lambda function {function_name} variables -> " + ) assert ( - result[0].status_extended - == f"Potential secret found in Lambda function {function_name} variables -> Secret Keyword in variable db_password, Basic Auth Credentials in variable db_password." + "Generic Password in variable db_password" in result[0].status_extended + ) + assert ( + "JSON Web Token (base64url-encoded) in variable db_password" + in result[0].status_extended ) assert result[0].resource_tags == [] @@ -191,7 +202,12 @@ def test_function_secrets_in_variables_telegram_token(self): arn=function_arn, region=AWS_REGION_US_EAST_1, runtime=function_runtime, - environment={"TELEGRAM_BOT_TOKEN": "telegram-token"}, + environment={ + # The Telegram bot-token rule is no longer enabled in + # Kingfisher's built-in ruleset, so a detectable JWT + # is used to keep this token-in-variable case meaningful. + "TELEGRAM_BOT_TOKEN": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U" + }, ) } @@ -217,10 +233,10 @@ def test_function_secrets_in_variables_telegram_token(self): assert result[0].region == AWS_REGION_US_EAST_1 assert result[0].resource_id == function_name assert result[0].resource_arn == function_arn - assert result[0].status == "PASS" + assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"No secrets found in Lambda function {function_name} variables." + == f"Potential secret found in Lambda function {function_name} variables -> JSON Web Token (base64url-encoded) in variable TELEGRAM_BOT_TOKEN." ) assert result[0].resource_tags == [] diff --git a/tests/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets_test.py b/tests/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets_test.py index 6b8d0d9b15e..ad9d2777882 100644 --- a/tests/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets_test.py +++ b/tests/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets_test.py @@ -38,7 +38,10 @@ def test_stack_secret_in_outputs(self): Stack( arn="arn:aws:cloudformation:eu-west-1:123456789012:stack/Test-Stack/796c8d26-b390-41d7-a23c-0702c4e78b60", name=stack_name, - outputs=["DB_PASSWORD:foobar123", "ENV:DEV"], + outputs=[ + "DB_KEY:eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U", + "ENV:DEV", + ], region=AWS_REGION, ) ] @@ -66,7 +69,7 @@ def test_stack_secret_in_outputs(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secret found in CloudFormation Stack {stack_name} Outputs -> Secret Keyword in Output 1." + == f"Potential secret found in CloudFormation Stack {stack_name} Outputs -> JSON Web Token (base64url-encoded) in Output 1." ) assert result[0].resource_id == "Test-Stack" assert ( diff --git a/tests/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs_test.py b/tests/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs_test.py index e9eb00175b5..69db1621dc0 100644 --- a/tests/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs_test.py +++ b/tests/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs_test.py @@ -132,7 +132,7 @@ def test_cloudwatch_log_group_with_secrets(self): logEvents=[ { "timestamp": timestamp, - "message": "password = password123", + "message": 'password = "Tr0ub4dor3xKq9vLmZ"', } ], ) @@ -174,7 +174,7 @@ def test_cloudwatch_log_group_with_secrets(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secrets found in log group test in log stream test stream at {dttimestamp} - Secret Keyword on line 1." + == f"Potential secrets found in log group test in log stream test stream at {dttimestamp} - Generic Password on line 1." ) assert result[0].resource_id == "test" assert ( diff --git a/tests/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables_test.py b/tests/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables_test.py index c08d1c8bb34..957a1ceb63b 100644 --- a/tests/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables_test.py +++ b/tests/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables_test.py @@ -202,7 +202,11 @@ def test_project_with_sensitive_plaintext_credentials(self): environment_variables=[ { "name": "AWS_ACCESS_KEY_ID", - "value": "AKIAIOSFODNN7EXAMPLE", + # Realistic fake secret that Kingfisher detects. The classic + # "AKIAIOSFODNN7EXAMPLE" placeholder is suppressed by + # Kingfisher and its AWS Access Key rule is not enabled, so a + # detectable provider secret is used instead. + "value": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U", "type": "PLAINTEXT", } ], @@ -231,9 +235,18 @@ def test_project_with_sensitive_plaintext_credentials(self): assert len(result) == 1 assert result[0].status == "FAIL" + # The JWT paired with a "KEY" variable name yields both a + # JWT and a Generic API Key finding; order is non-deterministic. + assert result[0].status_extended.startswith( + "CodeBuild project SensitiveProject has sensitive environment plaintext credentials in variables:" + ) assert ( - result[0].status_extended - == "CodeBuild project SensitiveProject has sensitive environment plaintext credentials in variables: AWS Access Key in variable AWS_ACCESS_KEY_ID." + "JSON Web Token (base64url-encoded) in variable AWS_ACCESS_KEY_ID" + in result[0].status_extended + ) + assert ( + "Generic API Key in variable AWS_ACCESS_KEY_ID" + in result[0].status_extended ) assert result[0].region == AWS_REGION_US_EAST_1 assert result[0].resource_id == "SensitiveProject" @@ -373,12 +386,12 @@ def test_project_with_sensitive_plaintext_credentials_excluded_and_failed(self): environment_variables=[ { "name": "AWS_DUMB_ACCESS_KEY", - "value": "AKIAIOSFODNN7EXAMPLE", + "value": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U", "type": "PLAINTEXT", }, { "name": "AWS_ACCESS_KEY_ID", - "value": "AKIAIOSFODNN7EXAMPLE", + "value": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U", "type": "PLAINTEXT", }, ], @@ -409,10 +422,21 @@ def test_project_with_sensitive_plaintext_credentials_excluded_and_failed(self): assert len(result) == 1 assert result[0].status == "FAIL" + # AWS_DUMB_ACCESS_KEY is excluded, so only AWS_ACCESS_KEY_ID is + # scanned; its JWT + "KEY" name yields both a JWT and a + # Generic API Key finding with non-deterministic order. + assert result[0].status_extended.startswith( + "CodeBuild project SensitiveProject has sensitive environment plaintext credentials in variables:" + ) assert ( - result[0].status_extended - == "CodeBuild project SensitiveProject has sensitive environment plaintext credentials in variables: AWS Access Key in variable AWS_ACCESS_KEY_ID." + "JSON Web Token (base64url-encoded) in variable AWS_ACCESS_KEY_ID" + in result[0].status_extended + ) + assert ( + "Generic API Key in variable AWS_ACCESS_KEY_ID" + in result[0].status_extended ) + assert "AWS_DUMB_ACCESS_KEY" not in result[0].status_extended assert result[0].region == AWS_REGION_US_EAST_1 assert result[0].resource_id == "SensitiveProject" assert result[0].resource_arn == project_arn @@ -434,12 +458,12 @@ def test_project_with_multiple_sensitive_credentials(self): environment_variables=[ { "name": "AWS_DUMB_ACCESS_KEY", - "value": "AKIAIOSFODNN7EXAMPLE", + "value": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U", "type": "PLAINTEXT", }, { "name": "AWS_ACCESS_KEY_ID", - "value": "AKIAIOSFODNN7EXAMPLE", + "value": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U", "type": "PLAINTEXT", }, ], @@ -468,10 +492,21 @@ def test_project_with_multiple_sensitive_credentials(self): assert len(result) == 1 assert result[0].status == "FAIL" - assert ( - result[0].status_extended - == "CodeBuild project SensitiveProject has sensitive environment plaintext credentials in variables: AWS Access Key in variable AWS_DUMB_ACCESS_KEY, AWS Access Key in variable AWS_ACCESS_KEY_ID." + # Both variables hold a JWT and have "KEY" in their name, so + # each yields a JWT and a Generic API Key finding; order is + # non-deterministic. + assert result[0].status_extended.startswith( + "CodeBuild project SensitiveProject has sensitive environment plaintext credentials in variables:" ) + for var_name in ("AWS_DUMB_ACCESS_KEY", "AWS_ACCESS_KEY_ID"): + assert ( + f"JSON Web Token (base64url-encoded) in variable {var_name}" + in result[0].status_extended + ) + assert ( + f"Generic API Key in variable {var_name}" + in result[0].status_extended + ) assert result[0].region == AWS_REGION_US_EAST_1 assert result[0].resource_id == "SensitiveProject" assert result[0].resource_arn == project_arn diff --git a/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data_test.py b/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data_test.py index 0e8d303fe05..02763e5ea3a 100644 --- a/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data_test.py +++ b/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data_test.py @@ -100,7 +100,7 @@ def test_one_ec2_with_secrets(self): ImageId=EXAMPLE_AMI_ID, MinCount=1, MaxCount=1, - UserData="DB_PASSWORD=foobar123", + UserData='DB_PASSWORD="Tr0ub4dor3xKq9vLmZ"', )[0] from prowler.providers.aws.services.ec2.ec2_service import EC2 @@ -130,7 +130,7 @@ def test_one_ec2_with_secrets(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secret found in EC2 instance {instance.id} User Data -> Secret Keyword on line 1." + == f"Potential secret found in EC2 instance {instance.id} User Data -> Generic Password on line 1." ) assert result[0].resource_id == instance.id assert ( @@ -233,7 +233,7 @@ def test_one_ec2_file_with_secrets(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secret found in EC2 instance {instance.id} User Data -> Secret Keyword on line 1, Hex High Entropy String on line 3, Secret Keyword on line 3, Secret Keyword on line 4." + == f"Potential secret found in EC2 instance {instance.id} User Data -> Generic Password on line 1, JSON Web Token (base64url-encoded) on line 3, Generic Password on line 4." ) assert result[0].resource_id == instance.id assert ( @@ -327,7 +327,7 @@ def test_one_ec2_file_with_secrets_gzip(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secret found in EC2 instance {instance.id} User Data -> Secret Keyword on line 1, Hex High Entropy String on line 3, Secret Keyword on line 3, Secret Keyword on line 4." + == f"Potential secret found in EC2 instance {instance.id} User Data -> Generic Password on line 1, JSON Web Token (base64url-encoded) on line 3, Generic Password on line 4." ) assert result[0].resource_id == instance.id assert ( @@ -337,6 +337,59 @@ def test_one_ec2_file_with_secrets_gzip(self): assert result[0].resource_tags is None assert result[0].region == AWS_REGION_US_EAST_1 + @mock_aws + def test_one_ec2_with_verified_secret(self): + from prowler.lib.check.models import Severity + + ec2 = resource("ec2", region_name=AWS_REGION_US_EAST_1) + instance = ec2.create_instances( + ImageId=EXAMPLE_AMI_ID, + MinCount=1, + MaxCount=1, + UserData='STRIPE_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U"', + )[0] + + from prowler.providers.aws.services.ec2.ec2_service import EC2 + + aws_provider = set_mocked_aws_provider( + [AWS_REGION_EU_WEST_1, AWS_REGION_US_EAST_1] + ) + + with ( + mock.patch( + "prowler.providers.common.provider.Provider.get_global_provider", + return_value=aws_provider, + ), + mock.patch( + "prowler.providers.aws.services.ec2.ec2_instance_secrets_user_data.ec2_instance_secrets_user_data.ec2_client", + new=EC2(aws_provider), + ), + mock.patch( + "prowler.providers.aws.services.ec2.ec2_instance_secrets_user_data.ec2_instance_secrets_user_data.detect_secrets_scan", + return_value=[ + { + "type": "JSON Web Token (base64url-encoded)", + "line_number": 1, + "filename": "data", + "hashed_secret": "x", + "is_verified": True, + } + ], + ), + ): + from prowler.providers.aws.services.ec2.ec2_instance_secrets_user_data.ec2_instance_secrets_user_data import ( + ec2_instance_secrets_user_data, + ) + + check = ec2_instance_secrets_user_data() + result = check.execute() + + assert len(result) == 1 + assert result[0].status == "FAIL" + assert result[0].check_metadata.Severity == Severity.critical + assert "confirmed to be live" in result[0].status_extended + assert result[0].resource_id == instance.id + @mock_aws def test_one_secrets_with_unicode_error(self): invalid_utf8_bytes = b"\xc0\xaf" diff --git a/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/fixtures/fixture b/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/fixtures/fixture index 2fb5138932d..528ff40f8ff 100644 --- a/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/fixtures/fixture +++ b/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/fixtures/fixture @@ -1,4 +1,4 @@ -DB_PASSWORD=foobar123 +DB_PASSWORD="Tr0ub4dor3xKq9vLmZ" DB_USER=foo -API_KEY=12345abcd -SERVICE_PASSWORD=bbaabb45 +STRIPE_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U +SERVICE_PASSWORD="Xy9zPq2wKmRtVbN4" diff --git a/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/fixtures/fixture.gz b/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/fixtures/fixture.gz index 6120fcfbc43b6f852eee6457e370abb2dc7ba9a4..859b38cb62b5f701272d834460a3c1a450566541 100644 GIT binary patch literal 206 zcmV;<05Sg`iwFoE`#Wj?17>M>bairN08P%%5<);60N}gF@Bq!$s>#8jP<~{0XDz9V z8I%yBKjP_g?EW1-V;ixJ=N&0GG+A`$9V=|$ I)yn_?06v*vaR2}S literal 93 zcmb2|=HL)?KbXwGoR(QpQd*SCP+Zu>U%&ZJ0|5I%A`bun diff --git a/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets_test.py b/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets_test.py index 0430ca5caec..e0f66ad534c 100644 --- a/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets_test.py +++ b/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets_test.py @@ -29,7 +29,9 @@ def mock_make_api_call(self, operation_name, kwarg): "VersionNumber": 123, "LaunchTemplateData": { "UserData": b64encode( - "DB_PASSWORD=foobar123".encode(encoding_format_utf_8) + 'DB_PASSWORD="Tr0ub4dor3xKq9vLmZ"'.encode( + encoding_format_utf_8 + ) ).decode(encoding_format_utf_8), "NetworkInterfaces": [{"AssociatePublicIpAddress": True}], }, @@ -164,7 +166,7 @@ def test_one_launch_template_with_secrets(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == "Potential secret found in User Data for EC2 Launch Template tester1 in template versions: Version 123: Secret Keyword on line 1." + == "Potential secret found in User Data for EC2 Launch Template tester1 in template versions: Version 123: Generic Password on line 1." ) assert result[0].resource_id == "lt-1234567890" assert result[0].region == AWS_REGION_US_EAST_1 @@ -236,7 +238,7 @@ def test_one_launch_template_with_secrets_in_multiple_versions(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secret found in User Data for EC2 Launch Template {launch_template_name} in template versions: Version 1: Secret Keyword on line 1, Hex High Entropy String on line 3, Secret Keyword on line 3, Secret Keyword on line 4, Version 2: Secret Keyword on line 1, Hex High Entropy String on line 3, Secret Keyword on line 3, Secret Keyword on line 4." + == f"Potential secret found in User Data for EC2 Launch Template {launch_template_name} in template versions: Version 1: Generic Password on line 1, JSON Web Token (base64url-encoded) on line 3, Generic Password on line 4, Version 2: Generic Password on line 1, JSON Web Token (base64url-encoded) on line 3, Generic Password on line 4." ) assert result[0].resource_id == launch_template_id assert result[0].region == AWS_REGION_US_EAST_1 @@ -314,7 +316,7 @@ def test_one_launch_template_with_secrets_in_single_version(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secret found in User Data for EC2 Launch Template {launch_template_name} in template versions: Version 1: Secret Keyword on line 1, Hex High Entropy String on line 3, Secret Keyword on line 3, Secret Keyword on line 4." + == f"Potential secret found in User Data for EC2 Launch Template {launch_template_name} in template versions: Version 1: Generic Password on line 1, JSON Web Token (base64url-encoded) on line 3, Generic Password on line 4." ) assert result[0].resource_id == launch_template_id assert result[0].region == AWS_REGION_US_EAST_1 @@ -382,7 +384,7 @@ def test_one_launch_template_with_secrets_gzip(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secret found in User Data for EC2 Launch Template {launch_template_name} in template versions: Version 1: Secret Keyword on line 1, Hex High Entropy String on line 3, Secret Keyword on line 3, Secret Keyword on line 4." + == f"Potential secret found in User Data for EC2 Launch Template {launch_template_name} in template versions: Version 1: Generic Password on line 1, JSON Web Token (base64url-encoded) on line 3, Generic Password on line 4." ) assert result[0].resource_id == launch_template_id assert result[0].region == AWS_REGION_US_EAST_1 @@ -530,7 +532,7 @@ def test_two_launch_templates_one_template_with_secrets(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secret found in User Data for EC2 Launch Template {launch_template_name1} in template versions: Version 1: Secret Keyword on line 1, Hex High Entropy String on line 3, Secret Keyword on line 3, Secret Keyword on line 4." + == f"Potential secret found in User Data for EC2 Launch Template {launch_template_name1} in template versions: Version 1: Generic Password on line 1, JSON Web Token (base64url-encoded) on line 3, Generic Password on line 4." ) assert result[0].resource_id == launch_template_id1 assert result[0].region == AWS_REGION_US_EAST_1 diff --git a/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/fixtures/fixture b/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/fixtures/fixture index 2fb5138932d..528ff40f8ff 100644 --- a/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/fixtures/fixture +++ b/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/fixtures/fixture @@ -1,4 +1,4 @@ -DB_PASSWORD=foobar123 +DB_PASSWORD="Tr0ub4dor3xKq9vLmZ" DB_USER=foo -API_KEY=12345abcd -SERVICE_PASSWORD=bbaabb45 +STRIPE_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U +SERVICE_PASSWORD="Xy9zPq2wKmRtVbN4" diff --git a/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/fixtures/fixture.gz b/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/fixtures/fixture.gz index 6120fcfbc43b6f852eee6457e370abb2dc7ba9a4..859b38cb62b5f701272d834460a3c1a450566541 100644 GIT binary patch literal 206 zcmV;<05Sg`iwFoE`#Wj?17>M>bairN08P%%5<);60N}gF@Bq!$s>#8jP<~{0XDz9V z8I%yBKjP_g?EW1-V;ixJ=N&0GG+A`$9V=|$ I)yn_?06v*vaR2}S literal 93 zcmb2|=HL)?KbXwGoR(QpQd*SCP+Zu>U%&ZJ0|5I%A`bun diff --git a/tests/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets_test.py b/tests/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets_test.py index 24c27ccf0b1..753c00b0ef4 100644 --- a/tests/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets_test.py +++ b/tests/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets_test.py @@ -11,9 +11,17 @@ ENV_VAR_NAME_NO_SECRETS = "host" ENV_VAR_VALUE_NO_SECRETS = "localhost:1234" ENV_VAR_NAME_WITH_KEYWORD = "DB_PASSWORD" -ENV_VAR_VALUE_WITH_SECRETS = "srv://admin:pass@db" +# Realistic fake secrets that Kingfisher actually detects (placeholders such as +# the previous "srv://admin:pass@db" basic-auth URL are no longer flagged). +# A JWT fires on any line of the dumped JSON (even when followed by a +# trailing comma); a keyword-named variable additionally fires the generic +# keyword rule when it is the last entry in the dump. +ENV_VAR_VALUE_WITH_SECRETS = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U" ENV_VAR_NAME_WITH_KEYWORD2 = "DATABASE_PASSWORD" -ENV_VAR_VALUE_WITH_SECRETS2 = "srv://admin:password@database" +ENV_VAR_VALUE_WITH_SECRETS2 = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5ODc2NTQzMjEwIiwibmFtZSI6IkphbmUifQ.s5LqY8mC2pX1vN0bQwReTyUiOpAsDfGhJkLzXcVbNm0" +# Generic password/secret assignment value (detected only on the last entry of +# the JSON dump, where there is no trailing comma after the value). +ENV_VAR_VALUE_GENERIC_SECRET = "Tr0ub4dor3xKq9vLmZ" class Test_ecs_task_definitions_no_environment_secrets: @@ -143,7 +151,7 @@ def test_container_env_var_with_secret(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secrets found in ECS task definition {TASK_NAME} with revision {TASK_REVISION}: Secrets in container test-container -> Basic Auth Credentials on the environment variable host." + == f"Potential secrets found in ECS task definition {TASK_NAME} with revision {TASK_REVISION}: Secrets in container test-container -> JSON Web Token (base64url-encoded) on the environment variable host." ) assert result[0].resource_id == f"{TASK_NAME}:{TASK_REVISION}" assert result[0].resource_arn == task_arn @@ -167,7 +175,7 @@ def test_container_env_var_with_keyword(self): "environment": [ { "name": ENV_VAR_NAME_WITH_KEYWORD, - "value": ENV_VAR_VALUE_NO_SECRETS, + "value": ENV_VAR_VALUE_GENERIC_SECRET, } ], } @@ -198,7 +206,7 @@ def test_container_env_var_with_keyword(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secrets found in ECS task definition {TASK_NAME} with revision {TASK_REVISION}: Secrets in container test-container -> Secret Keyword on the environment variable DB_PASSWORD." + == f"Potential secrets found in ECS task definition {TASK_NAME} with revision {TASK_REVISION}: Secrets in container test-container -> Generic Password on the environment variable DB_PASSWORD." ) assert result[0].resource_id == f"{TASK_NAME}:{TASK_REVISION}" assert result[0].resource_arn == task_arn @@ -251,9 +259,20 @@ def test_container_env_var_with_keyword_and_secret(self): result = check.execute() assert len(result) == 1 assert result[0].status == "FAIL" + # The keyword-named variable holding a real secret triggers both the + # generic keyword rule and the JWT rule on the same line. + # Kingfisher emits same-line findings in a non-deterministic order, so + # assert both are present without pinning their order. + assert result[0].status_extended.startswith( + f"Potential secrets found in ECS task definition {TASK_NAME} with revision {TASK_REVISION}: Secrets in container test-container -> " + ) assert ( - result[0].status_extended - == f"Potential secrets found in ECS task definition {TASK_NAME} with revision {TASK_REVISION}: Secrets in container test-container -> Secret Keyword on the environment variable DB_PASSWORD, Basic Auth Credentials on the environment variable DB_PASSWORD." + "JSON Web Token (base64url-encoded) on the environment variable DB_PASSWORD" + in result[0].status_extended + ) + assert ( + "Generic Password on the environment variable DB_PASSWORD" + in result[0].status_extended ) assert result[0].resource_id == f"{TASK_NAME}:{TASK_REVISION}" assert result[0].resource_arn == task_arn @@ -310,9 +329,23 @@ def test_container_multiple_env_vars_with_keyword_and_secret(self): result = check.execute() assert len(result) == 1 assert result[0].status == "FAIL" + # DB_PASSWORD holds a JWT under a keyword name, so it fires + # both the JWT rule and the generic keyword rule on the + # same line (non-deterministic order); host holds a second JWT. + assert result[0].status_extended.startswith( + f"Potential secrets found in ECS task definition {TASK_NAME} with revision {TASK_REVISION}: Secrets in container test-container -> " + ) assert ( - result[0].status_extended - == f"Potential secrets found in ECS task definition {TASK_NAME} with revision {TASK_REVISION}: Secrets in container test-container -> Secret Keyword on the environment variable DB_PASSWORD, Basic Auth Credentials on the environment variable DB_PASSWORD, Basic Auth Credentials on the environment variable host." + "JSON Web Token (base64url-encoded) on the environment variable DB_PASSWORD" + in result[0].status_extended + ) + assert ( + "Generic Password on the environment variable DB_PASSWORD" + in result[0].status_extended + ) + assert ( + "JSON Web Token (base64url-encoded) on the environment variable host" + in result[0].status_extended ) assert result[0].resource_id == f"{TASK_NAME}:{TASK_REVISION}" assert result[0].resource_arn == task_arn @@ -340,7 +373,7 @@ def test_container_all_env_vars_with_keyword_and_secret(self): }, { "name": ENV_VAR_NAME_WITH_KEYWORD2, - "value": ENV_VAR_VALUE_WITH_SECRETS2, + "value": ENV_VAR_VALUE_GENERIC_SECRET, }, ], } @@ -369,9 +402,24 @@ def test_container_all_env_vars_with_keyword_and_secret(self): result = check.execute() assert len(result) == 1 assert result[0].status == "FAIL" + # DB_PASSWORD holds a JWT under a keyword name, so it fires + # both the JWT and the generic keyword rule on the same line + # (non-deterministic order); DATABASE_PASSWORD fires the generic + # keyword rule on its own line. + assert result[0].status_extended.startswith( + f"Potential secrets found in ECS task definition {TASK_NAME} with revision {TASK_REVISION}: Secrets in container test-container -> " + ) assert ( - result[0].status_extended - == f"Potential secrets found in ECS task definition {TASK_NAME} with revision {TASK_REVISION}: Secrets in container test-container -> Secret Keyword on the environment variable DB_PASSWORD, Basic Auth Credentials on the environment variable DB_PASSWORD, Basic Auth Credentials on the environment variable DATABASE_PASSWORD, Secret Keyword on the environment variable DATABASE_PASSWORD." + "JSON Web Token (base64url-encoded) on the environment variable DB_PASSWORD" + in result[0].status_extended + ) + assert ( + "Generic Password on the environment variable DB_PASSWORD" + in result[0].status_extended + ) + assert ( + "Generic Password on the environment variable DATABASE_PASSWORD" + in result[0].status_extended ) assert result[0].resource_id == f"{TASK_NAME}:{TASK_REVISION}" assert result[0].resource_arn == task_arn diff --git a/tests/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets_test.py b/tests/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets_test.py index 24f0a1fdd1a..096d012e74a 100644 --- a/tests/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets_test.py +++ b/tests/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets_test.py @@ -33,7 +33,7 @@ def test_document_with_secrets(self): arn=document_arn, name=document_name, region=AWS_REGION_US_EAST_1, - content={"db_password": "test-password"}, + content={"db_password": "Tr0ub4dor3xKq9vLmZ"}, account_owners=[], ) } @@ -56,7 +56,7 @@ def test_document_with_secrets(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secret found in SSM Document {document_name} -> Secret Keyword on line 2." + == f"Potential secret found in SSM Document {document_name} -> Generic Password on line 2." ) def test_document_no_secrets(self): diff --git a/tests/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition_test.py b/tests/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition_test.py index 628525e5421..1b1fa1bfcac 100644 --- a/tests/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition_test.py +++ b/tests/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition_test.py @@ -147,7 +147,7 @@ def test_statemachine_with_secrets_in_definition(self): arn=statemachine_arn, name="TestStateMachine", status=StateMachineStatus.ACTIVE, - definition='{"Comment": "Example with secret", "StartAt": "MyTask", "States": {"MyTask": {"Type": "Task", "Parameters": {"api_key": "AKIAIOSFODNN7EXAMPLE"}, "End": true}}}', + definition='{"Comment": "Example with secret", "StartAt": "MyTask", "States": {"MyTask": {"Type": "Task", "Parameters": {"api_key": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U"}, "End": true}}}', region=AWS_REGION_US_EAST_1, type=StateMachineType.STANDARD, creation_date=datetime.now(), diff --git a/tests/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data_test.py b/tests/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data_test.py index 6be9ac48f5f..7ed8315dc3e 100644 --- a/tests/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data_test.py +++ b/tests/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data_test.py @@ -141,7 +141,7 @@ def test_snapshot_password_in_metadata(self): status="available", size=50, volume_id="vol-1", - metadata={"db_password": "supersecret123"}, + metadata={"db_password": "Tr0ub4dor3xKq9vLmZ"}, project_id=OPENSTACK_PROJECT_ID, region=OPENSTACK_REGION, ) @@ -179,7 +179,9 @@ def test_snapshot_api_key_in_metadata(self): status="available", size=50, volume_id="vol-1", - metadata={"api_key": "sk-1234567890"}, + metadata={ + "api_key": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U" + }, project_id=OPENSTACK_PROJECT_ID, region=OPENSTACK_REGION, ) @@ -223,7 +225,9 @@ def test_snapshot_private_key_in_metadata(self): status="available", size=50, volume_id="vol-1", - metadata={"ssh_key": "-----BEGIN RSA PRIVATE KEY-----"}, + metadata={ + "ssh_key": "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQCUzlT9QGi8ZSr5\nk+LTRz/1TaiCCs6o1icW4cur0Q0hdBnbRJXUdjlQsgzmBvCBNkGHI8hb/RUPssvc\nDLU5kOQ3Wp2KgtbphhZ2PfpuJrzwHL1ejcJkRxegm/aTdmpoQKcxGeehAfHbmlLA\nxdfn6wPDfGji973yiRH56JRukJAaqF50HC2a/AVNC5HtZoVlbQ+WvVbYVUnPxNkv\nPpc53PjrBgWiTtdMONEqJ3jDiaqfUBt+TZYF0CFc9HgjnUniRX28OukDyLu+idOz\nFKyZxMXtqexkAvQLDW1PATpZgVQ7hJoCD8UVTXAtcgzPq5fA6AR2URiECHI6ZyL0\nUmixKfMNAgMBAAECggEAJRzp5wjdpmEgDQOkjpfGXJ6sAJUD8mmI8cTKeJWIzhdo\nDH8oVEdRJ65kl6lS6hMXWEZlJgYyrsnj3MPBnjQkKycbRCy6P59s8jwmfbsFI+iz\nFUZLXZm6i5jicGhYBRzc5hrlIYu73863RXOClAnSFDsu6K6rzfYASQFIJeRBwJfs\njqXinuun/h2zGjpiY+TtNsa8c+nC7f3sGsTzNJugDvBPWQzsnAMzXJqiyharre4V\no157XIOvdC0joIp8j/Ib1ZtMfz1K1LcgBgw0szSieIw0Rq8yQ0Ek7GtLh43jG+ap\nvcSEesTD1p4mjPXoWkPG8KYd4iwGedZaePfheVcKKQKBgQDNE03SWv18AH0d4fpB\nlFAtRybCfSvMORzBrt2oilz8wDmK+Zga5o+phCnM8v3eJy1v8BvIQ9RvwQA2uVgZ\nr701wNMpVrTsMujk83oVRhimZLk6Hyw07wmMgEHX7+izkm2Lk4Lk7Zol3VRfnWG6\nmIcUk7xB1yAs3mudsfx0VO0QyQKBgQC5wfdqCLj2hZk4sMZu8Bth+BHKChGItmDk\nAW7aNt+gaPyoryOJoi2OUO8ud8EyuqXiuslSk2pPtjvLhCppkoq6V8kmPAUzaxFk\n4nDEAxT9Un8IJ0j2ebv+koQKsBWjssbVSjrZgIcYIDK1QblgbCp2FSE3ima+V8ip\nOdNjiatWJQKBgEX8lox5nRSanhh6rIuA8DPjmmi5ix7xRs0avm7seXuQppK1R6G2\nmcTCY/mb2+Pa/vi6uuCHtZJGDaqfal+pyCr2GZp8CtapMS4hocJs37C5ozUguld+\nVIXsp4voRkQybsw5lWxHYloVxNu0vEuQDlmJabAWmNZ3OcbhnUSeTyFxAoGAFtkZ\n0owCHChwoT11Gt4jsBgwL/avE27DWigm92Y6eWOQeDsalupAyjmAQenu9Itqrgml\ni6egMu/KSQ0Xnmas86CqmC5XwWxQ9mS31BRA96u2/ky+t7pfej+RSDNCZiEuPbvk\noy4g78G+GvdbktWbH20X6dn3K0Bm6RG4w4yCa5UCgYBs0zAVs0DZmM8SUZJA/HuQ\nN6a1vKKns7xKw5N3SmX1KbDhx5LSZXfbUo2+QktE7iRf9G2f1o0q8kz9l/4AGXi1\nKJNUHupWoaQzGNrzAb27TUtFA0ocMG8KnqxjANWox5oPJS9OU5tw5H5dxeI/Senc\nkYW6eCnRzPcmBqex6Vuw4w==\n-----END PRIVATE KEY-----\n" + }, project_id=OPENSTACK_PROJECT_ID, region=OPENSTACK_REGION, ) @@ -277,7 +281,7 @@ def test_multiple_snapshots_mixed(self): status="available", size=50, volume_id="vol-2", - metadata={"admin_password": "secret123"}, + metadata={"admin_password": "Tr0ub4dor3xKq9vLmZ"}, project_id=OPENSTACK_PROJECT_ID, region=OPENSTACK_REGION, ), @@ -318,7 +322,7 @@ def test_snapshot_metadata_key_correct_identification(self): metadata={ "environment": "production", "application": "web-app", - "db_password": "supersecret123", + "db_password": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U", "region": "us-east", }, project_id=OPENSTACK_PROJECT_ID, diff --git a/tests/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data_test.py b/tests/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data_test.py index e12babb23c4..5ee9eac49db 100644 --- a/tests/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data_test.py +++ b/tests/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data_test.py @@ -159,7 +159,7 @@ def test_volume_password_in_metadata(self): is_bootable=False, is_multiattach=False, attachments=[], - metadata={"db_password": "supersecret123"}, + metadata={"db_password": "Tr0ub4dor3xKq9vLmZ"}, availability_zone="nova", snapshot_id="", source_volume_id="", @@ -204,7 +204,9 @@ def test_volume_api_key_in_metadata(self): is_bootable=False, is_multiattach=False, attachments=[], - metadata={"api_key": "sk-1234567890"}, + metadata={ + "api_key": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U" + }, availability_zone="nova", snapshot_id="", source_volume_id="", @@ -255,7 +257,9 @@ def test_volume_private_key_in_metadata(self): is_bootable=False, is_multiattach=False, attachments=[], - metadata={"ssh_key": "-----BEGIN RSA PRIVATE KEY-----"}, + metadata={ + "ssh_key": "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQCUzlT9QGi8ZSr5\nk+LTRz/1TaiCCs6o1icW4cur0Q0hdBnbRJXUdjlQsgzmBvCBNkGHI8hb/RUPssvc\nDLU5kOQ3Wp2KgtbphhZ2PfpuJrzwHL1ejcJkRxegm/aTdmpoQKcxGeehAfHbmlLA\nxdfn6wPDfGji973yiRH56JRukJAaqF50HC2a/AVNC5HtZoVlbQ+WvVbYVUnPxNkv\nPpc53PjrBgWiTtdMONEqJ3jDiaqfUBt+TZYF0CFc9HgjnUniRX28OukDyLu+idOz\nFKyZxMXtqexkAvQLDW1PATpZgVQ7hJoCD8UVTXAtcgzPq5fA6AR2URiECHI6ZyL0\nUmixKfMNAgMBAAECggEAJRzp5wjdpmEgDQOkjpfGXJ6sAJUD8mmI8cTKeJWIzhdo\nDH8oVEdRJ65kl6lS6hMXWEZlJgYyrsnj3MPBnjQkKycbRCy6P59s8jwmfbsFI+iz\nFUZLXZm6i5jicGhYBRzc5hrlIYu73863RXOClAnSFDsu6K6rzfYASQFIJeRBwJfs\njqXinuun/h2zGjpiY+TtNsa8c+nC7f3sGsTzNJugDvBPWQzsnAMzXJqiyharre4V\no157XIOvdC0joIp8j/Ib1ZtMfz1K1LcgBgw0szSieIw0Rq8yQ0Ek7GtLh43jG+ap\nvcSEesTD1p4mjPXoWkPG8KYd4iwGedZaePfheVcKKQKBgQDNE03SWv18AH0d4fpB\nlFAtRybCfSvMORzBrt2oilz8wDmK+Zga5o+phCnM8v3eJy1v8BvIQ9RvwQA2uVgZ\nr701wNMpVrTsMujk83oVRhimZLk6Hyw07wmMgEHX7+izkm2Lk4Lk7Zol3VRfnWG6\nmIcUk7xB1yAs3mudsfx0VO0QyQKBgQC5wfdqCLj2hZk4sMZu8Bth+BHKChGItmDk\nAW7aNt+gaPyoryOJoi2OUO8ud8EyuqXiuslSk2pPtjvLhCppkoq6V8kmPAUzaxFk\n4nDEAxT9Un8IJ0j2ebv+koQKsBWjssbVSjrZgIcYIDK1QblgbCp2FSE3ima+V8ip\nOdNjiatWJQKBgEX8lox5nRSanhh6rIuA8DPjmmi5ix7xRs0avm7seXuQppK1R6G2\nmcTCY/mb2+Pa/vi6uuCHtZJGDaqfal+pyCr2GZp8CtapMS4hocJs37C5ozUguld+\nVIXsp4voRkQybsw5lWxHYloVxNu0vEuQDlmJabAWmNZ3OcbhnUSeTyFxAoGAFtkZ\n0owCHChwoT11Gt4jsBgwL/avE27DWigm92Y6eWOQeDsalupAyjmAQenu9Itqrgml\ni6egMu/KSQ0Xnmas86CqmC5XwWxQ9mS31BRA96u2/ky+t7pfej+RSDNCZiEuPbvk\noy4g78G+GvdbktWbH20X6dn3K0Bm6RG4w4yCa5UCgYBs0zAVs0DZmM8SUZJA/HuQ\nN6a1vKKns7xKw5N3SmX1KbDhx5LSZXfbUo2+QktE7iRf9G2f1o0q8kz9l/4AGXi1\nKJNUHupWoaQzGNrzAb27TUtFA0ocMG8KnqxjANWox5oPJS9OU5tw5H5dxeI/Senc\nkYW6eCnRzPcmBqex6Vuw4w==\n-----END PRIVATE KEY-----\n" + }, availability_zone="nova", snapshot_id="", source_volume_id="", @@ -323,7 +327,7 @@ def test_multiple_volumes_mixed(self): is_bootable=False, is_multiattach=False, attachments=[], - metadata={"admin_password": "secret123"}, + metadata={"admin_password": "Tr0ub4dor3xKq9vLmZ"}, availability_zone="nova", snapshot_id="", source_volume_id="", @@ -371,7 +375,7 @@ def test_volume_metadata_key_correct_identification(self): metadata={ "environment": "production", "application": "web-app", - "db_password": "supersecret123", + "db_password": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U", "region": "us-east", }, availability_zone="nova", diff --git a/tests/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data_test.py b/tests/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data_test.py index 174a8ab83f4..c1c6c99dcd5 100644 --- a/tests/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data_test.py +++ b/tests/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data_test.py @@ -181,7 +181,7 @@ def test_instance_password_in_metadata(self): private_v6="", networks={}, has_config_drive=False, - metadata={"db_password": "supersecret123"}, + metadata={"db_password": "Tr0ub4dor3xKq9vLmZ"}, user_data="", trusted_image_certificates=[], ) @@ -233,7 +233,9 @@ def test_instance_api_key_in_metadata(self): private_v6="", networks={}, has_config_drive=False, - metadata={"api_key": "sk-1234567890"}, + metadata={ + "api_key": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U" + }, user_data="", trusted_image_certificates=[], ) @@ -349,7 +351,9 @@ def test_instance_private_key_in_metadata(self): private_v6="", networks={}, has_config_drive=False, - metadata={"ssh_key": "-----BEGIN RSA PRIVATE KEY-----"}, + metadata={ + "ssh_key": "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQCUzlT9QGi8ZSr5\nk+LTRz/1TaiCCs6o1icW4cur0Q0hdBnbRJXUdjlQsgzmBvCBNkGHI8hb/RUPssvc\nDLU5kOQ3Wp2KgtbphhZ2PfpuJrzwHL1ejcJkRxegm/aTdmpoQKcxGeehAfHbmlLA\nxdfn6wPDfGji973yiRH56JRukJAaqF50HC2a/AVNC5HtZoVlbQ+WvVbYVUnPxNkv\nPpc53PjrBgWiTtdMONEqJ3jDiaqfUBt+TZYF0CFc9HgjnUniRX28OukDyLu+idOz\nFKyZxMXtqexkAvQLDW1PATpZgVQ7hJoCD8UVTXAtcgzPq5fA6AR2URiECHI6ZyL0\nUmixKfMNAgMBAAECggEAJRzp5wjdpmEgDQOkjpfGXJ6sAJUD8mmI8cTKeJWIzhdo\nDH8oVEdRJ65kl6lS6hMXWEZlJgYyrsnj3MPBnjQkKycbRCy6P59s8jwmfbsFI+iz\nFUZLXZm6i5jicGhYBRzc5hrlIYu73863RXOClAnSFDsu6K6rzfYASQFIJeRBwJfs\njqXinuun/h2zGjpiY+TtNsa8c+nC7f3sGsTzNJugDvBPWQzsnAMzXJqiyharre4V\no157XIOvdC0joIp8j/Ib1ZtMfz1K1LcgBgw0szSieIw0Rq8yQ0Ek7GtLh43jG+ap\nvcSEesTD1p4mjPXoWkPG8KYd4iwGedZaePfheVcKKQKBgQDNE03SWv18AH0d4fpB\nlFAtRybCfSvMORzBrt2oilz8wDmK+Zga5o+phCnM8v3eJy1v8BvIQ9RvwQA2uVgZ\nr701wNMpVrTsMujk83oVRhimZLk6Hyw07wmMgEHX7+izkm2Lk4Lk7Zol3VRfnWG6\nmIcUk7xB1yAs3mudsfx0VO0QyQKBgQC5wfdqCLj2hZk4sMZu8Bth+BHKChGItmDk\nAW7aNt+gaPyoryOJoi2OUO8ud8EyuqXiuslSk2pPtjvLhCppkoq6V8kmPAUzaxFk\n4nDEAxT9Un8IJ0j2ebv+koQKsBWjssbVSjrZgIcYIDK1QblgbCp2FSE3ima+V8ip\nOdNjiatWJQKBgEX8lox5nRSanhh6rIuA8DPjmmi5ix7xRs0avm7seXuQppK1R6G2\nmcTCY/mb2+Pa/vi6uuCHtZJGDaqfal+pyCr2GZp8CtapMS4hocJs37C5ozUguld+\nVIXsp4voRkQybsw5lWxHYloVxNu0vEuQDlmJabAWmNZ3OcbhnUSeTyFxAoGAFtkZ\n0owCHChwoT11Gt4jsBgwL/avE27DWigm92Y6eWOQeDsalupAyjmAQenu9Itqrgml\ni6egMu/KSQ0Xnmas86CqmC5XwWxQ9mS31BRA96u2/ky+t7pfej+RSDNCZiEuPbvk\noy4g78G+GvdbktWbH20X6dn3K0Bm6RG4w4yCa5UCgYBs0zAVs0DZmM8SUZJA/HuQ\nN6a1vKKns7xKw5N3SmX1KbDhx5LSZXfbUo2+QktE7iRf9G2f1o0q8kz9l/4AGXi1\nKJNUHupWoaQzGNrzAb27TUtFA0ocMG8KnqxjANWox5oPJS9OU5tw5H5dxeI/Senc\nkYW6eCnRzPcmBqex6Vuw4w==\n-----END PRIVATE KEY-----\n" + }, user_data="", trusted_image_certificates=[], ) @@ -431,7 +435,7 @@ def test_multiple_instances_mixed(self): private_v6="", networks={}, has_config_drive=False, - metadata={"admin_password": "secret123"}, + metadata={"admin_password": "Tr0ub4dor3xKq9vLmZ"}, user_data="", trusted_image_certificates=[], ), @@ -486,7 +490,7 @@ def test_instance_multiple_metadata_keys_correct_identification(self): metadata={ "environment": "production", "application": "web-app", - "db_password": "supersecret123", + "db_password": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U", "region": "us-east", }, user_data="", @@ -544,7 +548,7 @@ def test_instance_metadata_key_ordering(self): has_config_drive=False, metadata={ "first_key": "safe_value", - "api_key": "sk-1234567890abcdef", + "api_key": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U", "third_key": "also_safe", }, user_data="", diff --git a/tests/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data_test.py b/tests/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data_test.py index 6cae6432c73..96eb55caf0c 100644 --- a/tests/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data_test.py +++ b/tests/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data_test.py @@ -157,7 +157,7 @@ def test_container_password_in_metadata(self): history_location="", sync_to="", sync_key="", - metadata={"db_password": "supersecret123"}, + metadata={"db_password": "Tr0ub4dor3xKq9vLmZ"}, ) ] @@ -217,7 +217,7 @@ def test_multiple_containers_mixed(self): history_location="", sync_to="", sync_key="", - metadata={"admin_password": "secret123"}, + metadata={"admin_password": "Tr0ub4dor3xKq9vLmZ"}, ), ] From 8eec9325e9b31db1c96cd103742204a5133bbfe1 Mon Sep 17 00:00:00 2001 From: Daniel Barranquero Date: Wed, 24 Jun 2026 17:51:21 +0200 Subject: [PATCH 04/20] docs: document kingfisher secret scanning and validation --- .../cli/tutorials/configuration_file.mdx | 28 +++++++++++++++++++ docs/user-guide/cli/tutorials/pentesting.mdx | 4 +-- prowler/CHANGELOG.md | 12 ++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/docs/user-guide/cli/tutorials/configuration_file.mdx b/docs/user-guide/cli/tutorials/configuration_file.mdx index 657549f8b0b..bf6a48eabe6 100644 --- a/docs/user-guide/cli/tutorials/configuration_file.mdx +++ b/docs/user-guide/cli/tutorials/configuration_file.mdx @@ -87,6 +87,34 @@ The following list includes all the AWS checks with configurable variables that | `opensearch_service_domains_not_publicly_accessible` | `trusted_ips` | List of Strings | +### Validating Discovered Secrets + +import { VersionBadge } from "/snippets/version-badge.mdx" + + + +By default, the secret-scanning checks run fully offline: secrets are detected but never sent anywhere. Setting `secrets_validate` to `True` additionally confirms whether each discovered secret is live by authenticating with it against the corresponding provider API. The discovered secret itself serves as the credential, so Prowler requires no additional permissions to validate it. + +`secrets_validate` applies to every AWS secret-scanning check listed above (those that accept `secrets_ignore_patterns`). + +To enable validation through the configuration file, set the value under the `aws` section: + +```yaml +aws: + secrets_validate: True +``` + +To enable validation for a single scan, use Prowler CLI: + +``` +prowler aws --scan-secrets-validate +``` + + +Secret validation makes outbound network calls that authenticate with each discovered secret. The credential is exercised against the provider, so the call appears in the audited account's logs and can trigger its monitoring (for example, AWS CloudTrail records the validation request). Validation stays disabled by default so that scans remain fully offline. + + + ## Azure ### Configurable Checks diff --git a/docs/user-guide/cli/tutorials/pentesting.mdx b/docs/user-guide/cli/tutorials/pentesting.mdx index 0ad53136113..a0aa628b536 100644 --- a/docs/user-guide/cli/tutorials/pentesting.mdx +++ b/docs/user-guide/cli/tutorials/pentesting.mdx @@ -6,7 +6,7 @@ Prowler has some checks that analyse pentesting risks (Secrets, Internet Exposed ## Detect Secrets -Prowler uses `detect-secrets` library to search for any secrets that are stores in plaintext within your environment. +Prowler scans for secrets stored in plaintext within the audited environment using [Kingfisher](https://github.com/mongodb/kingfisher), an open-source secret-scanning engine. By default these scans run fully offline, so no data leaves the audited environment. Discovered secrets can optionally be validated against the provider APIs to confirm whether they are live — see [Validating Discovered Secrets](/user-guide/cli/tutorials/configuration_file#validating-discovered-secrets). The actual checks that have this functionality are the following: @@ -19,7 +19,7 @@ The actual checks that have this functionality are the following: - ecs\_task\_definitions\_no\_environment\_secrets - ssm\_document\_secrets -To execute detect-secrets related checks, you can run the following command: +To execute the secret-scanning checks, run the following command: ```console prowler --categories secrets diff --git a/prowler/CHANGELOG.md b/prowler/CHANGELOG.md index b040b32a78d..9b7dc598eeb 100644 --- a/prowler/CHANGELOG.md +++ b/prowler/CHANGELOG.md @@ -2,6 +2,18 @@ All notable changes to the **Prowler SDK** are documented in this file. +## [5.32.0] (Prowler UNRELEASED) + +### 🚀 Added + +- `--scan-secrets-validate` flag and `secrets_validate` configuration option to optionally validate the secrets discovered by the secret-scanning checks against the provider APIs; secrets confirmed to be live are reported as critical [(#XXXXX)](https://github.com/prowler-cloud/prowler/pull/XXXXX) + +### 🔄 Changed + +- Replaced the `detect-secrets` library with [Kingfisher](https://github.com/mongodb/kingfisher) as the engine for the secret-scanning checks; scans run fully offline by default and obvious placeholder values are no longer reported as findings [(#XXXXX)](https://github.com/prowler-cloud/prowler/pull/XXXXX) + +--- + ## [5.31.0] (Prowler v5.31.0) ### 🚀 Added From 6f418cb0c31fba09bfe428b3fee83944b72bba66 Mon Sep 17 00:00:00 2001 From: Daniel Barranquero Date: Thu, 25 Jun 2026 09:27:35 +0200 Subject: [PATCH 05/20] chore: add secrets flag into aws schema --- prowler/config/schema/aws.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/prowler/config/schema/aws.py b/prowler/config/schema/aws.py index 1a44bb9bcc7..c34cf1d42cb 100644 --- a/prowler/config/schema/aws.py +++ b/prowler/config/schema/aws.py @@ -394,6 +394,14 @@ class AWSProviderConfig(ProviderConfigBase): # --- Secrets --------------------------------------------------------- secrets_ignore_patterns: Optional[list[str]] = None + secrets_validate: Optional[bool] = Field( + default=None, + description=( + "Validate discovered secrets against the provider APIs (live check). " + "Makes outbound network calls that authenticate with the discovered " + "secret. Disabled by default." + ), + ) max_days_secret_unused: Optional[int] = Field( default=None, ge=7, From 5e54451af3178e4d009343fa1e9a9656c348bdfb Mon Sep 17 00:00:00 2001 From: Daniel Barranquero Date: Thu, 25 Jun 2026 10:30:17 +0200 Subject: [PATCH 06/20] chore(sdk): remove deprecated detect_secrets_plugins config option --- docs/developer-guide/configurable-checks.mdx | 1 - prowler/CHANGELOG.md | 1 + prowler/config/config.yaml | 31 ------------- prowler/config/schema/aws.py | 26 ----------- ...g_find_secrets_ec2_launch_configuration.py | 3 -- .../awslambda_function_no_secrets_in_code.py | 3 -- ...lambda_function_no_secrets_in_variables.py | 3 -- ...oudformation_stack_outputs_find_secrets.py | 3 -- ...debuild_project_no_secrets_in_variables.py | 3 -- .../ec2_instance_secrets_user_data.py | 3 -- .../ec2_launch_template_no_secrets.py | 3 -- ...task_definitions_no_environment_secrets.py | 3 -- .../glue_etl_jobs_no_secrets_in_arguments.py | 3 -- .../ssm_document_secrets.py | 3 -- ...s_statemachine_no_secrets_in_definition.py | 3 -- ...shot_metadata_sensitive_data.metadata.json | 2 +- ...torage_snapshot_metadata_sensitive_data.py | 3 -- ...lume_metadata_sensitive_data.metadata.json | 2 +- ...kstorage_volume_metadata_sensitive_data.py | 3 -- ...ance_metadata_sensitive_data.metadata.json | 2 +- ...ompute_instance_metadata_sensitive_data.py | 3 -- ...iner_metadata_sensitive_data.metadata.json | 2 +- ...orage_container_metadata_sensitive_data.py | 3 -- tests/config/schema/aws_schema_test.py | 45 ------------------- tests/config/schema/bounds_test.py | 32 ------------- .../ec2_launch_template_no_secrets_test.py | 8 ++-- .../ssm_document_secrets_test.py | 2 +- 27 files changed, 10 insertions(+), 189 deletions(-) diff --git a/docs/developer-guide/configurable-checks.mdx b/docs/developer-guide/configurable-checks.mdx index 760dc80f586..0cdbaff84b0 100644 --- a/docs/developer-guide/configurable-checks.mdx +++ b/docs/developer-guide/configurable-checks.mdx @@ -149,7 +149,6 @@ Only fields with a numeric range, a fixed value set, or a length cap are listed. | `max_days_secret_unused` | `7..365` days | | | `max_days_secret_unrotated` | `1..180` days | NIST IA-5: rotate quarterly; CIS ≤90 | | `min_kinesis_stream_retention_hours` | `24..8760` h | 1 day .. 1 year | -| `detect_secrets_plugins[].limit` | `0.0..10.0` | Shannon entropy threshold | | `shodan_api_key` | ≤512 chars | | ### Azure diff --git a/prowler/CHANGELOG.md b/prowler/CHANGELOG.md index 572b6609771..da095a16e23 100644 --- a/prowler/CHANGELOG.md +++ b/prowler/CHANGELOG.md @@ -13,6 +13,7 @@ All notable changes to the **Prowler SDK** are documented in this file. ### 🔄 Changed - Replaced the `detect-secrets` library with [Kingfisher](https://github.com/mongodb/kingfisher) as the engine for the secret-scanning checks; scans run fully offline by default and obvious placeholder values are no longer reported as findings [(#11694)](https://github.com/prowler-cloud/prowler/pull/11694) +- Removed the `detect_secrets_plugins` configuration option, which is no longer used by the new secret-scanning engine [(#11694)](https://github.com/prowler-cloud/prowler/pull/11694) --- diff --git a/prowler/config/config.yaml b/prowler/config/config.yaml index 78309786ef2..9d8695b8495 100644 --- a/prowler/config/config.yaml +++ b/prowler/config/config.yaml @@ -443,37 +443,6 @@ aws: # Minimum retention period in hours for Kinesis streams min_kinesis_stream_retention_hours: 168 # 7 days - # Detect Secrets plugin configuration - detect_secrets_plugins: [ - {"name": "ArtifactoryDetector"}, - {"name": "AWSKeyDetector"}, - {"name": "AzureStorageKeyDetector"}, - {"name": "BasicAuthDetector"}, - {"name": "CloudantDetector"}, - {"name": "DiscordBotTokenDetector"}, - {"name": "GitHubTokenDetector"}, - {"name": "GitLabTokenDetector"}, - {"name": "Base64HighEntropyString", "limit": 6.0}, - {"name": "HexHighEntropyString", "limit": 3.0}, - {"name": "IbmCloudIamDetector"}, - {"name": "IbmCosHmacDetector"}, - # {"name": "IPPublicDetector"}, https://github.com/Yelp/detect-secrets/pull/885 - {"name": "JwtTokenDetector"}, - {"name": "KeywordDetector"}, - {"name": "MailchimpDetector"}, - {"name": "NpmDetector"}, - {"name": "OpenAIDetector"}, - {"name": "PrivateKeyDetector"}, - {"name": "PypiTokenDetector"}, - {"name": "SendGridDetector"}, - {"name": "SlackDetector"}, - {"name": "SoftlayerDetector"}, - {"name": "SquareOAuthDetector"}, - {"name": "StripeDetector"}, - # {"name": "TelegramBotTokenDetector"}, https://github.com/Yelp/detect-secrets/pull/878 - {"name": "TwilioKeyDetector"}, - ] - # AWS CodeBuild Configuration # aws.codebuild_project_uses_allowed_github_organizations codebuild_github_allowed_organizations: diff --git a/prowler/config/schema/aws.py b/prowler/config/schema/aws.py index c34cf1d42cb..d15fc276c5c 100644 --- a/prowler/config/schema/aws.py +++ b/prowler/config/schema/aws.py @@ -101,29 +101,6 @@ def _validate_account_ids(v: Optional[list[str]]) -> Optional[list[str]]: return v -# ---- Nested models ---------------------------------------------------------- - - -class _DetectSecretsPlugin(ProviderConfigBase): - """One entry inside ``detect_secrets_plugins``. - - Only ``name`` is required by the upstream library. ``limit`` is used by - the entropy detectors. Any other plugin-specific kwarg is preserved by - the ``extra="allow"`` policy inherited from ProviderConfigBase. - """ - - name: str - limit: Optional[float] = Field( - default=None, - ge=0.0, - le=10.0, - description=( - "Entropy threshold for detect-secrets entropy plugins. Range: 0..10 " - "(Shannon entropy is bounded by log2(256)=8; >10 is meaningless)." - ), - ) - - # ---- Main schema ------------------------------------------------------------ @@ -425,6 +402,3 @@ class AWSProviderConfig(ProviderConfigBase): le=8760, description="Hours of Kinesis stream retention. Range: 24..8760 (1 day .. 1 year).", ) - - # --- detect-secrets plugin list ------------------------------------- - detect_secrets_plugins: Optional[list[_DetectSecretsPlugin]] = None diff --git a/prowler/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration.py b/prowler/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration.py index 209e54ab078..e4f7b15d855 100644 --- a/prowler/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration.py +++ b/prowler/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration.py @@ -45,9 +45,6 @@ def execute(self): has_secrets = detect_secrets_scan( data=user_data, excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=autoscaling_client.audit_config.get( - "detect_secrets_plugins" - ), validate=autoscaling_client.audit_config.get( "secrets_validate", False ), diff --git a/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code.py b/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code.py index b4350a37d9a..fc8795c6884 100644 --- a/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code.py +++ b/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code.py @@ -33,9 +33,6 @@ def execute(self): detect_secrets_output = detect_secrets_scan( file=f"{tmp_dir_name}/{file}", excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=awslambda_client.audit_config.get( - "detect_secrets_plugins", - ), validate=awslambda_client.audit_config.get( "secrets_validate", False ), diff --git a/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables.py b/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables.py index 2a3ec0e6f63..280749397d0 100644 --- a/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables.py +++ b/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables.py @@ -23,9 +23,6 @@ def execute(self): detect_secrets_output = detect_secrets_scan( data=json.dumps(function.environment, indent=2), excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=awslambda_client.audit_config.get( - "detect_secrets_plugins", - ), validate=awslambda_client.audit_config.get( "secrets_validate", False ), diff --git a/prowler/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets.py b/prowler/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets.py index 89919589139..71a835e2c20 100644 --- a/prowler/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets.py +++ b/prowler/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets.py @@ -29,9 +29,6 @@ def execute(self): detect_secrets_output = detect_secrets_scan( data=data, excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=cloudformation_client.audit_config.get( - "detect_secrets_plugins", - ), validate=cloudformation_client.audit_config.get( "secrets_validate", False ), diff --git a/prowler/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables.py b/prowler/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables.py index 4689fa32a7f..5416dd3fd08 100644 --- a/prowler/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables.py +++ b/prowler/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables.py @@ -30,9 +30,6 @@ def execute(self): detect_secrets_output = detect_secrets_scan( data=json.dumps({env_var.name: env_var.value}), excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=codebuild_client.audit_config.get( - "detect_secrets_plugins", - ), validate=codebuild_client.audit_config.get( "secrets_validate", False ), diff --git a/prowler/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data.py b/prowler/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data.py index c991818841b..f737bfa923c 100644 --- a/prowler/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data.py +++ b/prowler/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data.py @@ -39,9 +39,6 @@ def execute(self): detect_secrets_output = detect_secrets_scan( data=user_data, excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=ec2_client.audit_config.get( - "detect_secrets_plugins" - ), validate=ec2_client.audit_config.get("secrets_validate", False), ) if detect_secrets_output: diff --git a/prowler/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets.py b/prowler/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets.py index 3f349720d99..d98ba60e28a 100644 --- a/prowler/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets.py +++ b/prowler/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets.py @@ -46,9 +46,6 @@ def execute(self): version_secrets = detect_secrets_scan( data=user_data, excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=ec2_client.audit_config.get( - "detect_secrets_plugins" - ), validate=ec2_client.audit_config.get("secrets_validate", False), ) diff --git a/prowler/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets.py b/prowler/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets.py index 2a1ca2f2dfd..283d5decd6d 100644 --- a/prowler/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets.py +++ b/prowler/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets.py @@ -34,9 +34,6 @@ def execute(self): detect_secrets_output = detect_secrets_scan( data=env_data, excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=ecs_client.audit_config.get( - "detect_secrets_plugins", - ), validate=ecs_client.audit_config.get("secrets_validate", False), ) if detect_secrets_output: diff --git a/prowler/providers/aws/services/glue/glue_etl_jobs_no_secrets_in_arguments/glue_etl_jobs_no_secrets_in_arguments.py b/prowler/providers/aws/services/glue/glue_etl_jobs_no_secrets_in_arguments/glue_etl_jobs_no_secrets_in_arguments.py index a7848032843..4b6c920cc4a 100644 --- a/prowler/providers/aws/services/glue/glue_etl_jobs_no_secrets_in_arguments/glue_etl_jobs_no_secrets_in_arguments.py +++ b/prowler/providers/aws/services/glue/glue_etl_jobs_no_secrets_in_arguments/glue_etl_jobs_no_secrets_in_arguments.py @@ -32,9 +32,6 @@ def execute(self): detect_secrets_output = detect_secrets_scan( data=json.dumps({arg_name: arg_value}), excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=glue_client.audit_config.get( - "detect_secrets_plugins", - ), validate=glue_client.audit_config.get( "secrets_validate", False ), diff --git a/prowler/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets.py b/prowler/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets.py index 42a04e223f8..09bec239fb6 100644 --- a/prowler/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets.py +++ b/prowler/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets.py @@ -22,9 +22,6 @@ def execute(self): detect_secrets_output = detect_secrets_scan( data=json.dumps(document.content, indent=2), excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=ssm_client.audit_config.get( - "detect_secrets_plugins" - ), validate=ssm_client.audit_config.get("secrets_validate", False), ) if detect_secrets_output: diff --git a/prowler/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition.py b/prowler/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition.py index 5c39215f748..72bc06c84f8 100644 --- a/prowler/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition.py +++ b/prowler/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition.py @@ -22,9 +22,6 @@ def execute(self) -> list[Check_Report_AWS]: detect_secrets_output = detect_secrets_scan( data=state_machine.definition, excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=stepfunctions_client.audit_config.get( - "detect_secrets_plugins", - ), validate=stepfunctions_client.audit_config.get( "secrets_validate", False ), diff --git a/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.metadata.json b/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.metadata.json index 597d3ab4d4d..60043831cbd 100644 --- a/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.metadata.json +++ b/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.metadata.json @@ -36,5 +36,5 @@ "RelatedTo": [ "blockstorage_volume_metadata_sensitive_data" ], - "Notes": "This check uses the detect-secrets library to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns and detect_secrets_plugins to customize detection." + "Notes": "This check uses the detect-secrets library to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns." } diff --git a/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.py b/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.py index d072d0164c6..e0cee0bc735 100644 --- a/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.py +++ b/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.py @@ -35,9 +35,6 @@ def execute(self) -> List[CheckReportOpenStack]: detect_secrets_output = detect_secrets_scan( data=metadata_json, excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=blockstorage_client.audit_config.get( - "detect_secrets_plugins" - ), validate=blockstorage_client.audit_config.get( "secrets_validate", False ), diff --git a/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.metadata.json b/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.metadata.json index ec17ee02d1b..cda2d8f89ef 100644 --- a/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.metadata.json +++ b/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.metadata.json @@ -34,5 +34,5 @@ ], "DependsOn": [], "RelatedTo": [], - "Notes": "This check uses the detect-secrets library to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns and detect_secrets_plugins to customize detection." + "Notes": "This check uses the detect-secrets library to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns." } diff --git a/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.py b/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.py index 7cc07fd1d98..dd95d28ec80 100644 --- a/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.py +++ b/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.py @@ -35,9 +35,6 @@ def execute(self) -> List[CheckReportOpenStack]: detect_secrets_output = detect_secrets_scan( data=metadata_json, excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=blockstorage_client.audit_config.get( - "detect_secrets_plugins" - ), validate=blockstorage_client.audit_config.get( "secrets_validate", False ), diff --git a/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.metadata.json b/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.metadata.json index 015a00986d4..89d82e7a484 100644 --- a/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.metadata.json +++ b/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.metadata.json @@ -34,5 +34,5 @@ ], "DependsOn": [], "RelatedTo": [], - "Notes": "This check uses the detect-secrets library to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns and detect_secrets_plugins to customize detection. Metadata is world-readable within instance via 169.254.169.254." + "Notes": "This check uses the detect-secrets library to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns. Metadata is world-readable within instance via 169.254.169.254." } diff --git a/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.py b/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.py index 6e0ec2b206b..8a0b9740db6 100644 --- a/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.py +++ b/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.py @@ -33,9 +33,6 @@ def execute(self) -> List[CheckReportOpenStack]: detect_secrets_output = detect_secrets_scan( data=metadata_json, excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=compute_client.audit_config.get( - "detect_secrets_plugins" - ), validate=compute_client.audit_config.get("secrets_validate", False), ) diff --git a/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.metadata.json b/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.metadata.json index b3a39bd3f8a..36055362cb0 100644 --- a/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.metadata.json +++ b/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.metadata.json @@ -35,5 +35,5 @@ ], "DependsOn": [], "RelatedTo": [], - "Notes": "This check uses the detect-secrets library to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns and detect_secrets_plugins to customize detection." + "Notes": "This check uses the detect-secrets library to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns." } diff --git a/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.py b/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.py index a4410712709..ae7b1fc00e4 100644 --- a/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.py +++ b/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.py @@ -37,9 +37,6 @@ def execute(self) -> List[CheckReportOpenStack]: detect_secrets_output = detect_secrets_scan( data=metadata_json, excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=objectstorage_client.audit_config.get( - "detect_secrets_plugins" - ), validate=objectstorage_client.audit_config.get( "secrets_validate", False ), diff --git a/tests/config/schema/aws_schema_test.py b/tests/config/schema/aws_schema_test.py index ad08e84e3be..8731e08ba9c 100644 --- a/tests/config/schema/aws_schema_test.py +++ b/tests/config/schema/aws_schema_test.py @@ -129,47 +129,6 @@ def test_invalid_severity_levels_are_dropped(self, level): assert _validate({"ecr_repository_vulnerability_minimum_severity": level}) == {} -class Test_AWS_Detect_Secrets_Plugins: - def test_plugin_without_limit(self): - out = _validate({"detect_secrets_plugins": [{"name": "AWSKeyDetector"}]}) - assert out == {"detect_secrets_plugins": [{"name": "AWSKeyDetector"}]} - - def test_plugin_with_limit(self): - out = _validate( - { - "detect_secrets_plugins": [ - {"name": "Base64HighEntropyString", "limit": 6.0} - ] - } - ) - assert out == { - "detect_secrets_plugins": [ - {"name": "Base64HighEntropyString", "limit": 6.0} - ] - } - - def test_plugin_missing_name_drops_whole_field(self): - # ``name`` is required by the upstream library. - out = _validate({"detect_secrets_plugins": [{"limit": 6.0}]}) - assert out == {} - - def test_extra_plugin_kwargs_pass_through(self): - # Plugins can have arbitrary extra params (extra="allow" on the - # nested model). They must round-trip. - out = _validate( - { - "detect_secrets_plugins": [ - {"name": "Custom", "my_param": "abc", "other": 42} - ] - } - ) - assert out == { - "detect_secrets_plugins": [ - {"name": "Custom", "my_param": "abc", "other": 42} - ] - } - - class Test_AWS_Booleans: @pytest.mark.parametrize( "key", @@ -214,9 +173,5 @@ def test_full_default_config_round_trip(self): "threat_detection_enumeration_threshold": 0.3, "threat_detection_llm_jacking_threshold": 0.4, "ec2_high_risk_ports": [25, 110, 8088], - "detect_secrets_plugins": [ - {"name": "AWSKeyDetector"}, - {"name": "Base64HighEntropyString", "limit": 6.0}, - ], } assert _validate(raw) == raw diff --git a/tests/config/schema/bounds_test.py b/tests/config/schema/bounds_test.py index 0e5cad6056f..f29e73681ae 100644 --- a/tests/config/schema/bounds_test.py +++ b/tests/config/schema/bounds_test.py @@ -330,38 +330,6 @@ def test_invalid_rejected(self, value): assert _has_error_for(errors, "aws.trusted_ips") -class TestDetectSecretsEntropyBound: - """`detect_secrets_plugins[].limit` is Shannon entropy: 0..10.""" - - @pytest.mark.parametrize("value", [0.0, 3.5, 4.5, 8.0, 10.0]) - def test_valid(self, value): - assert ( - validate_scan_config( - { - "aws": { - "detect_secrets_plugins": [ - {"name": "Base64HighEntropyString", "limit": value} - ] - } - } - ) - == [] - ) - - @pytest.mark.parametrize("value", [-0.1, 10.01, 50]) - def test_invalid(self, value): - errors = validate_scan_config( - { - "aws": { - "detect_secrets_plugins": [ - {"name": "Base64HighEntropyString", "limit": value} - ] - } - } - ) - assert _has_error_for(errors, "aws.detect_secrets_plugins") - - class TestAdapterRobustness: """Top-level adapter behaviour the Prowler App backend depends on.""" diff --git a/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets_test.py b/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets_test.py index e0f66ad534c..3cf7655585f 100644 --- a/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets_test.py +++ b/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets_test.py @@ -214,7 +214,7 @@ def test_one_launch_template_with_secrets_in_multiple_versions(self): ) ec2_client.launch_templates = [launch_template] - ec2_client.audit_config = {"detect_secrets_plugins": None} + ec2_client.audit_config = {} with ( mock.patch( @@ -292,7 +292,7 @@ def test_one_launch_template_with_secrets_in_single_version(self): ) ec2_client.launch_templates = [launch_template] - ec2_client.audit_config = {"detect_secrets_plugins": None} + ec2_client.audit_config = {} with ( mock.patch( @@ -360,7 +360,7 @@ def test_one_launch_template_with_secrets_gzip(self): ) ec2_client.launch_templates = [launch_template] - ec2_client.audit_config = {"detect_secrets_plugins": None} + ec2_client.audit_config = {} with ( mock.patch( @@ -508,7 +508,7 @@ def test_two_launch_templates_one_template_with_secrets(self): launch_template_secrets, launch_template_no_secrets, ] - ec2_client.audit_config = {"detect_secrets_plugins": None} + ec2_client.audit_config = {} with ( mock.patch( diff --git a/tests/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets_test.py b/tests/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets_test.py index 096d012e74a..9fc6de4762a 100644 --- a/tests/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets_test.py +++ b/tests/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets_test.py @@ -27,7 +27,7 @@ def test_document_with_secrets(self): document_name = "test-document" document_arn = f"arn:aws:ssm:{AWS_REGION_US_EAST_1}:{AWS_ACCOUNT_NUMBER}:document/{document_name}" ssm_client.audited_account = AWS_ACCOUNT_NUMBER - ssm_client.audit_config = {"detect_secrets_plugins": None} + ssm_client.audit_config = {} ssm_client.documents = { document_name: Document( arn=document_arn, From 5e4ddf442baf75801b8be4314a40bc49008a8150 Mon Sep 17 00:00:00 2001 From: Daniel Barranquero Date: Thu, 25 Jun 2026 10:30:23 +0200 Subject: [PATCH 07/20] fix(sdk): skip secret re-validation in cloudwatch line-number rescan --- .../cloudwatch_log_group_no_secrets_in_logs.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py b/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py index de1a2f9946b..2b92035aa4a 100644 --- a/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py +++ b/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py @@ -35,9 +35,6 @@ def execute(self): log_stream_secrets_output = detect_secrets_scan( data=log_stream_data, excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=logs_client.audit_config.get( - "detect_secrets_plugins", - ), validate=logs_client.audit_config.get( "secrets_validate", False ), @@ -75,12 +72,7 @@ def execute(self): # Will rescan just this event to get the type of secret and the line number event_detect_secrets_output = detect_secrets_scan( data=log_event_data, - detect_secrets_plugins=logs_client.audit_config.get( - "detect_secrets_plugins" - ), - validate=logs_client.audit_config.get( - "secrets_validate", False - ), + validate=False, ) if event_detect_secrets_output: for secret in event_detect_secrets_output: From 26d9f616938f5dba30619f196c985a3ff0b9bfe9 Mon Sep 17 00:00:00 2001 From: Daniel Barranquero Date: Thu, 25 Jun 2026 10:30:25 +0200 Subject: [PATCH 08/20] docs: complete secret-scanning check list and note provider-wide validation --- .../cli/tutorials/configuration_file.mdx | 4 ++-- docs/user-guide/cli/tutorials/pentesting.mdx | 15 ++++++++++++++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/docs/user-guide/cli/tutorials/configuration_file.mdx b/docs/user-guide/cli/tutorials/configuration_file.mdx index bf6a48eabe6..3aee9955d8a 100644 --- a/docs/user-guide/cli/tutorials/configuration_file.mdx +++ b/docs/user-guide/cli/tutorials/configuration_file.mdx @@ -95,7 +95,7 @@ import { VersionBadge } from "/snippets/version-badge.mdx" By default, the secret-scanning checks run fully offline: secrets are detected but never sent anywhere. Setting `secrets_validate` to `True` additionally confirms whether each discovered secret is live by authenticating with it against the corresponding provider API. The discovered secret itself serves as the credential, so Prowler requires no additional permissions to validate it. -`secrets_validate` applies to every AWS secret-scanning check listed above (those that accept `secrets_ignore_patterns`). +`secrets_validate` applies to every AWS secret-scanning check listed above (those that accept `secrets_ignore_patterns`). The `--scan-secrets-validate` CLI flag is provider-wide: it also enables validation for the secret-scanning checks of other providers, such as the OpenStack metadata checks. To enable validation through the configuration file, set the value under the `aws` section: @@ -104,7 +104,7 @@ aws: secrets_validate: True ``` -To enable validation for a single scan, use Prowler CLI: +To enable validation for a single scan (any provider), use Prowler CLI: ``` prowler aws --scan-secrets-validate diff --git a/docs/user-guide/cli/tutorials/pentesting.mdx b/docs/user-guide/cli/tutorials/pentesting.mdx index a0aa628b536..35d5b72be77 100644 --- a/docs/user-guide/cli/tutorials/pentesting.mdx +++ b/docs/user-guide/cli/tutorials/pentesting.mdx @@ -8,16 +8,29 @@ Prowler has some checks that analyse pentesting risks (Secrets, Internet Exposed Prowler scans for secrets stored in plaintext within the audited environment using [Kingfisher](https://github.com/mongodb/kingfisher), an open-source secret-scanning engine. By default these scans run fully offline, so no data leaves the audited environment. Discovered secrets can optionally be validated against the provider APIs to confirm whether they are live — see [Validating Discovered Secrets](/user-guide/cli/tutorials/configuration_file#validating-discovered-secrets). -The actual checks that have this functionality are the following: +The checks with this functionality are the following. + +AWS: - autoscaling\_find\_secrets\_ec2\_launch\_configuration - awslambda\_function\_no\_secrets\_in\_code - awslambda\_function\_no\_secrets\_in\_variables - cloudformation\_stack\_outputs\_find\_secrets +- cloudwatch\_log\_group\_no\_secrets\_in\_logs +- codebuild\_project\_no\_secrets\_in\_variables - ec2\_instance\_secrets\_user\_data - ec2\_launch\_template\_no\_secrets - ecs\_task\_definitions\_no\_environment\_secrets +- glue\_etl\_jobs\_no\_secrets\_in\_arguments - ssm\_document\_secrets +- stepfunctions\_statemachine\_no\_secrets\_in\_definition + +OpenStack: + +- compute\_instance\_metadata\_sensitive\_data +- blockstorage\_volume\_metadata\_sensitive\_data +- blockstorage\_snapshot\_metadata\_sensitive\_data +- objectstorage\_container\_metadata\_sensitive\_data To execute the secret-scanning checks, run the following command: From 21e07b12da1388fe2b9d6951d3f14e1b68f66ee7 Mon Sep 17 00:00:00 2001 From: Daniel Barranquero Date: Thu, 25 Jun 2026 11:27:38 +0200 Subject: [PATCH 09/20] feat(sdk): add batched secret scanning to amortize kingfisher subprocess cost --- prowler/lib/utils/utils.py | 228 +++++++++++++++++++++++++++------- tests/lib/utils/utils_test.py | 44 +++++++ 2 files changed, 226 insertions(+), 46 deletions(-) diff --git a/prowler/lib/utils/utils.py b/prowler/lib/utils/utils.py index 3f4159a61e7..3963ef9f1c0 100644 --- a/prowler/lib/utils/utils.py +++ b/prowler/lib/utils/utils.py @@ -9,6 +9,7 @@ pass import re +import shutil import subprocess import sys import tempfile @@ -37,6 +38,11 @@ # 205 (validated findings). _kingfisher_success_exit_codes = (0, 200, 205) +# Number of payloads scanned per Kingfisher invocation in batch mode. Bounds +# peak temp-disk and memory while still amortizing the per-process spawn cost +# across many fragments (see detect_secrets_scan_batch). +default_secrets_batch_chunk_size = 500 + @lru_cache(maxsize=1) def get_kingfisher_binary() -> str: @@ -46,6 +52,60 @@ def get_kingfisher_binary() -> str: return get_binary_path() +def _build_kingfisher_command( + scan_paths: list, + output_path: str, + confidence: str, + validate: bool, + no_dedup: bool = False, +) -> list: + """Build the Kingfisher ``scan`` command shared by single and batch scans.""" + command = [ + get_kingfisher_binary(), + "scan", + *scan_paths, + "--format", + "json", + "--output", + output_path, + "--no-update-check", + "--confidence", + confidence, + ] + if validate: + # Live-validate discovered secrets against provider APIs. Use + # conservative defaults (short timeout, no retries) to limit the blast + # radius of the outbound calls. + command += ["--validation-timeout", "5", "--validation-retries", "0"] + else: + command.append("--no-validate") + if no_dedup: + # Report every occurrence (one per file) so batched results match + # scanning each payload individually. + command.append("--no-dedup") + return command + + +def _finding_to_dict(entry: dict, fallback_filename: str) -> dict: + """Convert a Kingfisher finding entry into Prowler's finding dict shape.""" + rule = entry.get("rule", {}) + finding = entry.get("finding", {}) + snippet = finding.get("snippet", "") or "" + return { + "filename": finding.get("path", fallback_filename), + "line_number": finding.get("line"), + "type": rule.get("name"), + # Non-security identifier for the matched secret (matches the + # detect-secrets output shape); not used for security. + "hashed_secret": ( + sha1(snippet.encode(), usedforsecurity=False).hexdigest() + if snippet + else None + ), + "is_verified": finding.get("validation", {}).get("status") == "Active", + } + + def open_file(input_file: str, mode: str = "r") -> TextIOWrapper: """open_file returns a handler to the file using the specified mode.""" try: @@ -167,30 +227,9 @@ def detect_secrets_scan( temp_output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".json") temp_output_file.close() - command = [ - get_kingfisher_binary(), - "scan", - scan_path, - "--format", - "json", - "--output", - temp_output_file.name, - "--no-update-check", - "--confidence", - confidence, - ] - if validate: - # Live-validate discovered secrets against provider APIs. Use - # conservative defaults (short timeout, no retries) to limit the - # blast radius of the outbound calls. - command += [ - "--validation-timeout", - "5", - "--validation-retries", - "0", - ] - else: - command.append("--no-validate") + command = _build_kingfisher_command( + [scan_path], temp_output_file.name, confidence, validate + ) process = subprocess.run(command, capture_output=True, text=True) if process.returncode not in _kingfisher_success_exit_codes: logger.error( @@ -212,32 +251,12 @@ def detect_secrets_scan( findings = [] for entry in kingfisher_output.get("findings", []): - rule = entry.get("rule", {}) - finding = entry.get("finding", {}) - line_number = finding.get("line") - + line_number = entry.get("finding", {}).get("line") if excluded_secrets and line_number and line_number <= len(source_lines): line_text = source_lines[line_number - 1] if any(re.search(pattern, line_text) for pattern in excluded_secrets): continue - - snippet = finding.get("snippet", "") or "" - findings.append( - { - "filename": finding.get("path", scan_path), - "line_number": line_number, - "type": rule.get("name"), - # Non-security identifier for the matched secret (matches - # the detect-secrets output shape); not used for security. - "hashed_secret": ( - sha1(snippet.encode(), usedforsecurity=False).hexdigest() - if snippet - else None - ), - "is_verified": finding.get("validation", {}).get("status") - == "Active", - } - ) + findings.append(_finding_to_dict(entry, scan_path)) return findings or None except Exception as e: @@ -249,6 +268,123 @@ def detect_secrets_scan( os.remove(temp_file.name) +def _scan_batch_chunk( + chunk: list, + excluded_secrets: list, + confidence: str, + validate: bool, + results: dict, +) -> None: + """Scan one chunk of ``(key, data)`` payloads in a single Kingfisher call. + + Writes each payload to its own file in a temp directory, scans the whole + directory once (``--no-dedup`` so per-file results match individual scans), + maps findings back to their key by file path, and appends them to + ``results``. The temp directory is always removed. + """ + if not chunk: + return + tmp_dir = tempfile.mkdtemp() + temp_output_file = None + try: + index_to_key = {} + for index, (key, data) in enumerate(chunk): + content = data if data.endswith("\n") else data + "\n" + name = str(index) + with open(os.path.join(tmp_dir, name), "wb") as fh: + fh.write(bytes(content, encoding="raw_unicode_escape")) + index_to_key[name] = key + + temp_output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".json") + temp_output_file.close() + command = _build_kingfisher_command( + [tmp_dir], temp_output_file.name, confidence, validate, no_dedup=True + ) + process = subprocess.run(command, capture_output=True, text=True) + if process.returncode not in _kingfisher_success_exit_codes: + logger.error( + f"Error scanning for secrets: Kingfisher exited with code " + f"{process.returncode}: {process.stderr.strip()[:500]}" + ) + return + + with open(temp_output_file.name, encoding=encoding_format_utf_8) as f: + output = f.read() + kingfisher_output = json.loads(output) if output.strip() else {} + + source_lines_cache = {} + for entry in kingfisher_output.get("findings", []): + finding = entry.get("finding", {}) + name = os.path.basename(finding.get("path", "")) + key = index_to_key.get(name) + if key is None: + continue + line_number = finding.get("line") + if excluded_secrets and line_number: + if name not in source_lines_cache: + with open( + os.path.join(tmp_dir, name), + encoding=encoding_format_utf_8, + errors="replace", + ) as f: + source_lines_cache[name] = f.read().splitlines() + lines = source_lines_cache[name] + if line_number <= len(lines) and any( + re.search(pattern, lines[line_number - 1]) + for pattern in excluded_secrets + ): + continue + results.setdefault(key, []).append(_finding_to_dict(entry, name)) + except Exception as e: + logger.error(f"Error scanning for secrets: {e}") + finally: + if temp_output_file and os.path.exists(temp_output_file.name): + os.remove(temp_output_file.name) + shutil.rmtree(tmp_dir, ignore_errors=True) + + +def detect_secrets_scan_batch( + payloads, + excluded_secrets: list[str] = None, + confidence: str = default_secrets_confidence, + validate: bool = False, + chunk_size: int = default_secrets_batch_chunk_size, +) -> dict: + """Scan many in-memory payloads in chunked, single Kingfisher invocations. + + Each payload is written to its own file and scanned with ``--no-dedup`` so + per-payload results match calling ``detect_secrets_scan`` on each payload + individually. Payloads are processed in chunks (writing each to disk and + releasing it as it is consumed) to bound peak temp-disk and memory use while + amortizing the per-process spawn cost across many fragments. This is the + batched equivalent of looping ``detect_secrets_scan`` per fragment. + + Args: + payloads: a mapping ``{key: data}`` or any iterable of ``(key, data)`` + pairs. ``key`` is any hashable the caller uses to map findings back + to its source (e.g. a variable name or a ``(resource, stream)``). + excluded_secrets (list): regex patterns; a finding whose source line + matches one is excluded. + confidence (str): minimum Kingfisher confidence ("low"/"medium"/"high"). + validate (bool): live-validate discovered secrets (outbound calls). + chunk_size (int): payloads scanned per Kingfisher invocation. + Returns: + dict mapping each key that produced findings to its list of finding + dicts (same shape as ``detect_secrets_scan``). Keys with no findings are + omitted. + """ + items = payloads.items() if hasattr(payloads, "items") else payloads + results = {} + chunk = [] + for key, data in items: + chunk.append((key, data)) + if len(chunk) >= chunk_size: + _scan_batch_chunk(chunk, excluded_secrets, confidence, validate, results) + chunk = [] + _scan_batch_chunk(chunk, excluded_secrets, confidence, validate, results) + return results + + def annotate_verified_secrets(report, secrets: list) -> None: """Escalate and annotate a finding when any of its secrets is confirmed live. diff --git a/tests/lib/utils/utils_test.py b/tests/lib/utils/utils_test.py index 5e093eaa975..88724db9abf 100644 --- a/tests/lib/utils/utils_test.py +++ b/tests/lib/utils/utils_test.py @@ -8,6 +8,7 @@ from prowler.lib.utils.utils import ( detect_secrets_scan, + detect_secrets_scan_batch, file_exists, get_file_permissions, hash_sha512, @@ -216,6 +217,49 @@ def test_detect_secrets_scan_validate_enabled(self): assert "--validation-retries" in command +class Test_detect_secrets_scan_batch: + def test_batch_returns_findings_per_key(self): + results = detect_secrets_scan_batch( + { + "a": 'password = "Tr0ub4dor3xKq9vLmZ"', + "b": "just a normal config = value", + } + ) + assert "a" in results + assert results["a"][0]["type"] == "Generic Password" + # keys without findings are omitted + assert "b" not in results + + def test_batch_no_dedup_reports_identical_secret_in_each_key(self): + # The same secret in two payloads must be reported for both (matches + # scanning each payload individually). + secret = "token = eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U" + results = detect_secrets_scan_batch({"a": secret, "b": secret}) + assert "a" in results + assert "b" in results + + def test_batch_excluded_secrets_filters(self): + results = detect_secrets_scan_batch( + {"a": 'DB_ALLOW_EMPTY_PASSWORD = "Tr0ub4dor3xKq9vLmZ"'}, + excluded_secrets=[".*ALLOW_EMPTY_PASSWORD.*"], + ) + assert results == {} + + def test_batch_chunking_maps_all_keys(self): + payloads = {f"k{i}": f'password = "S3cr3tV4lu3xy{i}z"' for i in range(5)} + results = detect_secrets_scan_batch(payloads, chunk_size=2) + assert sorted(results.keys()) == ["k0", "k1", "k2", "k3", "k4"] + + def test_batch_empty_payloads(self): + assert detect_secrets_scan_batch({}) == {} + + def test_batch_accepts_iterable_of_pairs(self): + results = detect_secrets_scan_batch( + iter([("x", 'password = "Tr0ub4dor3xKq9vLmZ"')]) + ) + assert "x" in results + + class Test_hash_sha512: def test_hash_sha512(self): assert hash_sha512("test") == "ee26b0dd4" From 7bd6884b45ecbe38d40bbfa16291d289cdbc7e7a Mon Sep 17 00:00:00 2001 From: Daniel Barranquero Date: Thu, 25 Jun 2026 11:27:42 +0200 Subject: [PATCH 10/20] perf(sdk): batch secret scanning across cloudwatch, lambda and ecs resources --- .../awslambda_function_no_secrets_in_code.py | 120 +++++++++------- ...lambda_function_no_secrets_in_variables.py | 37 +++-- ...cloudwatch_log_group_no_secrets_in_logs.py | 129 ++++++++++-------- ...task_definitions_no_environment_secrets.py | 49 +++++-- 4 files changed, 200 insertions(+), 135 deletions(-) diff --git a/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code.py b/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code.py index fc8795c6884..21aea8dd231 100644 --- a/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code.py +++ b/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code.py @@ -1,68 +1,86 @@ import os import tempfile +from collections import defaultdict from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.aws.services.awslambda.awslambda_client import awslambda_client class awslambda_function_no_secrets_in_code(Check): def execute(self): findings = [] - if awslambda_client.functions: - secrets_ignore_patterns = awslambda_client.audit_config.get( - "secrets_ignore_patterns", [] - ) + if not awslambda_client.functions: + return findings + + secrets_ignore_patterns = awslambda_client.audit_config.get( + "secrets_ignore_patterns", [] + ) + validate = awslambda_client.audit_config.get("secrets_validate", False) + + # Scan the top-level files of every function's package in batched + # Kingfisher invocations instead of one subprocess per file per function. + # Each package is extracted one at a time and its top-level files are + # read (byte-faithfully via latin-1) before the extraction is released, + # so only a single package is on disk at a time. Findings are keyed by + # (function index, file name) so they can be grouped back per function. + functions_with_code = [] + + def code_payloads(): for function, function_code in awslambda_client._get_function_code(): - if function_code: - report = Check_Report_AWS( - metadata=self.metadata(), resource=function - ) + if not function_code: + continue + index = len(functions_with_code) + functions_with_code.append(function) + with tempfile.TemporaryDirectory() as tmp_dir_name: + function_code.code_zip.extractall(tmp_dir_name) + for file_name in next(os.walk(tmp_dir_name))[2]: + try: + with open( + os.path.join(tmp_dir_name, file_name), "rb" + ) as code_file: + content = code_file.read().decode("latin-1") + except Exception: + continue + yield (index, file_name), content + + batch_results = detect_secrets_scan_batch( + code_payloads(), + excluded_secrets=secrets_ignore_patterns, + validate=validate, + ) + + findings_by_function = defaultdict(dict) + for (index, file_name), file_findings in batch_results.items(): + findings_by_function[index][file_name] = file_findings + + for index, function in enumerate(functions_with_code): + report = Check_Report_AWS(metadata=self.metadata(), resource=function) + report.status = "PASS" + report.status_extended = ( + f"No secrets found in Lambda function {function.name} code." + ) - report.status = "PASS" - report.status_extended = ( - f"No secrets found in Lambda function {function.name} code." + files_with_secrets = findings_by_function.get(index) + if files_with_secrets: + all_secrets = [] + secrets_findings = [] + for file_name, file_findings in files_with_secrets.items(): + all_secrets.extend(file_findings) + secrets_string = ", ".join( + f"{secret['type']} on line {secret['line_number']}" + for secret in file_findings ) - with tempfile.TemporaryDirectory() as tmp_dir_name: - function_code.code_zip.extractall(tmp_dir_name) - # List all files - files_in_zip = next(os.walk(tmp_dir_name))[2] - secrets_findings = [] - all_secrets = [] - for file in files_in_zip: - detect_secrets_output = detect_secrets_scan( - file=f"{tmp_dir_name}/{file}", - excluded_secrets=secrets_ignore_patterns, - validate=awslambda_client.audit_config.get( - "secrets_validate", False - ), - ) - if detect_secrets_output: - all_secrets.extend(detect_secrets_output) - for ( - secret - ) in ( - detect_secrets_output - ): # Appears that only 1 file is being scanned at a time, so could rework this - output_file_name = secret["filename"].replace( - f"{tmp_dir_name}/", "" - ) - secrets_string = ", ".join( - [ - f"{secret['type']} on line {secret['line_number']}" - for secret in detect_secrets_output - ] - ) - secrets_findings.append( - f"{output_file_name}: {secrets_string}" - ) + secrets_findings.append(f"{file_name}: {secrets_string}") - if secrets_findings: - final_output_string = "; ".join(secrets_findings) - report.status = "FAIL" - report.status_extended = f"Potential {'secrets' if len(secrets_findings) > 1 else 'secret'} found in Lambda function {function.name} code -> {final_output_string}." - annotate_verified_secrets(report, all_secrets) + final_output_string = "; ".join(secrets_findings) + report.status = "FAIL" + report.status_extended = f"Potential {'secrets' if len(secrets_findings) > 1 else 'secret'} found in Lambda function {function.name} code -> {final_output_string}." + annotate_verified_secrets(report, all_secrets) - findings.append(report) + findings.append(report) return findings diff --git a/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables.py b/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables.py index 280749397d0..bfa4f07dca1 100644 --- a/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables.py +++ b/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables.py @@ -1,7 +1,10 @@ import json from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.aws.services.awslambda.awslambda_client import awslambda_client @@ -11,7 +14,25 @@ def execute(self): secrets_ignore_patterns = awslambda_client.audit_config.get( "secrets_ignore_patterns", [] ) - for function in awslambda_client.functions.values(): + validate = awslambda_client.audit_config.get("secrets_validate", False) + functions = list(awslambda_client.functions.values()) + + # Scan every function's environment variables in batched Kingfisher + # invocations instead of one subprocess per function. Payloads are + # yielded lazily so only a chunk is held/written at a time, which matters + # for accounts with very large numbers of Lambda functions. + def environment_payloads(): + for index, function in enumerate(functions): + if function.environment: + yield index, json.dumps(function.environment, indent=2) + + batch_results = detect_secrets_scan_batch( + environment_payloads(), + excluded_secrets=secrets_ignore_patterns, + validate=validate, + ) + + for index, function in enumerate(functions): report = Check_Report_AWS(metadata=self.metadata(), resource=function) report.status = "PASS" @@ -20,17 +41,9 @@ def execute(self): ) if function.environment: - detect_secrets_output = detect_secrets_scan( - data=json.dumps(function.environment, indent=2), - excluded_secrets=secrets_ignore_patterns, - validate=awslambda_client.audit_config.get( - "secrets_validate", False - ), - ) - original_env_vars = [] - for name, value in function.environment.items(): - original_env_vars.append(name) + detect_secrets_output = batch_results.get(index) if detect_secrets_output: + original_env_vars = list(function.environment.keys()) secrets_string = ", ".join( [ f"{secret['type']} in variable {original_env_vars[secret['line_number'] - 2]}" diff --git a/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py b/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py index 2b92035aa4a..2976d39a6cc 100644 --- a/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py +++ b/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py @@ -1,7 +1,11 @@ from json import dumps, loads from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan, + detect_secrets_scan_batch, +) from prowler.providers.aws.services.cloudwatch.cloudwatch_service import ( convert_to_cloudwatch_timestamp_format, ) @@ -15,6 +19,29 @@ def execute(self): secrets_ignore_patterns = logs_client.audit_config.get( "secrets_ignore_patterns", [] ) + validate = logs_client.audit_config.get("secrets_validate", False) + + # Scan every (log group, log stream) in batched Kingfisher + # invocations instead of one subprocess per stream. The payloads are + # yielded lazily so only a chunk's worth is ever written to disk / + # held in memory at a time, which matters for accounts with very + # large numbers of log groups/streams. + def stream_payloads(): + for log_group in logs_client.log_groups.values(): + if not log_group.log_streams: + continue + for log_stream_name, events in log_group.log_streams.items(): + yield ( + (log_group.name, log_stream_name), + "\n".join(dumps(event["message"]) for event in events), + ) + + batch_results = detect_secrets_scan_batch( + stream_payloads(), + excluded_secrets=secrets_ignore_patterns, + validate=validate, + ) + for log_group in logs_client.log_groups.values(): report = Check_Report_AWS(metadata=self.metadata(), resource=log_group) report.status = "PASS" @@ -25,66 +52,54 @@ def execute(self): all_secrets = [] if log_group.log_streams: for log_stream_name in log_group.log_streams: + log_stream_secrets_output = batch_results.get( + (log_group.name, log_stream_name) + ) + if not log_stream_secrets_output: + continue log_stream_secrets = {} - log_stream_data = "\n".join( - [ - dumps(event["message"]) - for event in log_group.log_streams[log_stream_name] + all_secrets.extend(log_stream_secrets_output) + for secret in log_stream_secrets_output: + flagged_event = log_group.log_streams[log_stream_name][ + secret["line_number"] - 1 ] - ) - log_stream_secrets_output = detect_secrets_scan( - data=log_stream_data, - excluded_secrets=secrets_ignore_patterns, - validate=logs_client.audit_config.get( - "secrets_validate", False - ), - ) - - if log_stream_secrets_output: - all_secrets.extend(log_stream_secrets_output) - for secret in log_stream_secrets_output: - flagged_event = log_group.log_streams[log_stream_name][ - secret["line_number"] - 1 - ] - cloudwatch_timestamp = ( - convert_to_cloudwatch_timestamp_format( - flagged_event["timestamp"] - ) + cloudwatch_timestamp = ( + convert_to_cloudwatch_timestamp_format( + flagged_event["timestamp"] ) - if ( - cloudwatch_timestamp - not in log_stream_secrets.keys() - ): - log_stream_secrets[cloudwatch_timestamp] = ( - SecretsDict() - ) + ) + if cloudwatch_timestamp not in log_stream_secrets.keys(): + log_stream_secrets[cloudwatch_timestamp] = SecretsDict() - try: - log_event_data = dumps( - loads(flagged_event["message"]), indent=2 - ) - except Exception: - log_event_data = dumps( - flagged_event["message"], indent=2 - ) - if len(log_event_data.split("\n")) > 1: - # Can get more informative output if there is more than 1 line. - # Will rescan just this event to get the type of secret and the line number - event_detect_secrets_output = detect_secrets_scan( - data=log_event_data, - validate=False, - ) - if event_detect_secrets_output: - for secret in event_detect_secrets_output: - log_stream_secrets[ - cloudwatch_timestamp - ].add_secret( - secret["line_number"], secret["type"] - ) - else: - log_stream_secrets[cloudwatch_timestamp].add_secret( - 1, secret["type"] - ) + try: + log_event_data = dumps( + loads(flagged_event["message"]), indent=2 + ) + except Exception: + log_event_data = dumps( + flagged_event["message"], indent=2 + ) + if len(log_event_data.split("\n")) > 1: + # Can get more informative output if there is more than 1 line. + # Will rescan just this event to get the type of secret and the line number. + # Validation is disabled here: this rescan only resolves line numbers + # for display and must not re-authenticate the secret. + event_detect_secrets_output = detect_secrets_scan( + data=log_event_data, + validate=False, + ) + if event_detect_secrets_output: + for event_secret in event_detect_secrets_output: + log_stream_secrets[ + cloudwatch_timestamp + ].add_secret( + event_secret["line_number"], + event_secret["type"], + ) + else: + log_stream_secrets[cloudwatch_timestamp].add_secret( + 1, secret["type"] + ) if log_stream_secrets: secrets_string = "; ".join( [ diff --git a/prowler/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets.py b/prowler/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets.py index 283d5decd6d..dc8a0991f73 100644 --- a/prowler/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets.py +++ b/prowler/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets.py @@ -1,7 +1,10 @@ from json import dumps from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.aws.services.ecs.ecs_client import ecs_client @@ -11,7 +14,31 @@ def execute(self): secrets_ignore_patterns = ecs_client.audit_config.get( "secrets_ignore_patterns", [] ) - for task_definition in ecs_client.task_definitions.values(): + validate = ecs_client.audit_config.get("secrets_validate", False) + task_definitions = list(ecs_client.task_definitions.values()) + + # Scan every (task definition, container) environment in batched + # Kingfisher invocations instead of one subprocess per container. + # Payloads are yielded lazily so only a chunk is held/written at a time. + def environment_payloads(): + for td_index, task_definition in enumerate(task_definitions): + for c_index, container in enumerate( + task_definition.container_definitions + ): + if container.environment: + dump_env_vars = { + env_var.name: env_var.value + for env_var in container.environment + } + yield (td_index, c_index), dumps(dump_env_vars, indent=2) + + batch_results = detect_secrets_scan_batch( + environment_payloads(), + excluded_secrets=secrets_ignore_patterns, + validate=validate, + ) + + for td_index, task_definition in enumerate(task_definitions): report = Check_Report_AWS( metadata=self.metadata(), resource=task_definition ) @@ -20,22 +47,14 @@ def execute(self): extended_status_parts = [] all_secrets = [] - for container in task_definition.container_definitions: + for c_index, container in enumerate(task_definition.container_definitions): container_secrets_found = [] if container.environment: - dump_env_vars = {} - original_env_vars = [] - for env_var in container.environment: - dump_env_vars.update({env_var.name: env_var.value}) - original_env_vars.append(env_var.name) - - env_data = dumps(dump_env_vars, indent=2) - detect_secrets_output = detect_secrets_scan( - data=env_data, - excluded_secrets=secrets_ignore_patterns, - validate=ecs_client.audit_config.get("secrets_validate", False), - ) + original_env_vars = [ + env_var.name for env_var in container.environment + ] + detect_secrets_output = batch_results.get((td_index, c_index)) if detect_secrets_output: all_secrets.extend(detect_secrets_output) secrets_string = ", ".join( From 741cc94235bd147778908880fb37ddcaa8cc62f3 Mon Sep 17 00:00:00 2001 From: Daniel Barranquero Date: Thu, 25 Jun 2026 12:31:24 +0200 Subject: [PATCH 11/20] refactor(sdk): batch secret scanning across all remaining secret checks --- ...g_find_secrets_ec2_launch_configuration.py | 43 ++++-- ...oudformation_stack_outputs_find_secrets.py | 36 ++--- ...debuild_project_no_secrets_in_variables.py | 54 +++++--- .../ec2_instance_secrets_user_data.py | 108 ++++++++------- .../ec2_launch_template_no_secrets.py | 73 ++++++----- .../glue_etl_jobs_no_secrets_in_arguments.py | 123 ++++++++++-------- .../ssm_document_secrets.py | 27 +++- ...s_statemachine_no_secrets_in_definition.py | 30 +++-- ...torage_snapshot_metadata_sensitive_data.py | 39 +++--- ...kstorage_volume_metadata_sensitive_data.py | 39 +++--- ...ompute_instance_metadata_sensitive_data.py | 37 +++--- ...orage_container_metadata_sensitive_data.py | 39 +++--- .../ec2_instance_secrets_user_data_test.py | 22 ++-- 13 files changed, 391 insertions(+), 279 deletions(-) diff --git a/prowler/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration.py b/prowler/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration.py index e4f7b15d855..3956702df42 100644 --- a/prowler/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration.py +++ b/prowler/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration.py @@ -4,7 +4,10 @@ from prowler.config.config import encoding_format_utf_8 from prowler.lib.check.models import Check, Check_Report_AWS from prowler.lib.logger import logger -from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.aws.services.autoscaling.autoscaling_client import ( autoscaling_client, ) @@ -16,13 +19,19 @@ def execute(self): secrets_ignore_patterns = autoscaling_client.audit_config.get( "secrets_ignore_patterns", [] ) - for ( - configuration_arn, - configuration, - ) in autoscaling_client.launch_configurations.items(): - report = Check_Report_AWS(metadata=self.metadata(), resource=configuration) + validate = autoscaling_client.audit_config.get("secrets_validate", False) + configurations = list(autoscaling_client.launch_configurations.values()) - if configuration.user_data: + # Collect the decoded User Data of each launch configuration and scan it + # all in batched Kingfisher invocations instead of one subprocess each. + # Configurations whose User Data cannot be decoded are skipped (no report), + # matching the original per-resource behavior. + skipped = set() + + def payloads(): + for index, configuration in enumerate(configurations): + if not configuration.user_data: + continue user_data = b64decode(configuration.user_data) try: if user_data[0:2] == b"\x1f\x8b": # GZIP magic number @@ -35,21 +44,27 @@ def execute(self): logger.warning( f"{configuration.region} -- Unable to decode user data in autoscaling launch configuration {configuration.name}: {error}" ) + skipped.add(index) continue except Exception as error: logger.error( f"{configuration.region} -- {error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}" ) + skipped.add(index) continue + yield index, user_data - has_secrets = detect_secrets_scan( - data=user_data, - excluded_secrets=secrets_ignore_patterns, - validate=autoscaling_client.audit_config.get( - "secrets_validate", False - ), - ) + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) + + for index, configuration in enumerate(configurations): + if index in skipped: + continue + report = Check_Report_AWS(metadata=self.metadata(), resource=configuration) + if configuration.user_data: + has_secrets = batch_results.get(index) if has_secrets: report.status = "FAIL" report.status_extended = f"Potential secret found in autoscaling {configuration.name} User Data." diff --git a/prowler/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets.py b/prowler/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets.py index 71a835e2c20..d545c8cbe74 100644 --- a/prowler/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets.py +++ b/prowler/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets.py @@ -1,5 +1,8 @@ from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.aws.services.cloudformation.cloudformation_client import ( cloudformation_client, ) @@ -14,26 +17,28 @@ def execute(self): secrets_ignore_patterns = cloudformation_client.audit_config.get( "secrets_ignore_patterns", [] ) - for stack in cloudformation_client.stacks: + validate = cloudformation_client.audit_config.get("secrets_validate", False) + stacks = list(cloudformation_client.stacks) + + # Collect one payload per stack (its Outputs) and scan them all in + # batched Kingfisher invocations instead of one subprocess per stack. + def payloads(): + for index, stack in enumerate(stacks): + if stack.outputs: + yield index, "".join(f"{output}\n" for output in stack.outputs) + + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) + + for index, stack in enumerate(stacks): report = Check_Report_AWS(metadata=self.metadata(), resource=stack) report.status = "PASS" report.status_extended = ( f"No secrets found in CloudFormation Stack {stack.name} Outputs." ) if stack.outputs: - data = "" - # Store the CloudFormation Stack Outputs into a file - for output in stack.outputs: - data += f"{output}\n" - - detect_secrets_output = detect_secrets_scan( - data=data, - excluded_secrets=secrets_ignore_patterns, - validate=cloudformation_client.audit_config.get( - "secrets_validate", False - ), - ) - # If secrets are found, update the report status + detect_secrets_output = batch_results.get(index) if detect_secrets_output: secrets_string = ", ".join( [ @@ -44,7 +49,6 @@ def execute(self): report.status = "FAIL" report.status_extended = f"Potential secret found in CloudFormation Stack {stack.name} Outputs -> {secrets_string}." annotate_verified_secrets(report, detect_secrets_output) - else: report.status = "PASS" report.status_extended = ( diff --git a/prowler/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables.py b/prowler/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables.py index 5416dd3fd08..1114ce76ae7 100644 --- a/prowler/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables.py +++ b/prowler/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables.py @@ -1,7 +1,10 @@ import json from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.aws.services.codebuild.codebuild_client import codebuild_client @@ -14,7 +17,29 @@ def execute(self): secrets_ignore_patterns = codebuild_client.audit_config.get( "secrets_ignore_patterns", [] ) - for project in codebuild_client.projects.values(): + validate = codebuild_client.audit_config.get("secrets_validate", False) + projects = list(codebuild_client.projects.values()) + + # Collect every scannable plaintext variable across all projects and scan + # them in batched Kingfisher invocations instead of one subprocess per + # variable. Findings are keyed by (project index, variable index). + def payloads(): + for project_index, project in enumerate(projects): + if project.environment_variables: + for var_index, env_var in enumerate(project.environment_variables): + if ( + env_var.type == "PLAINTEXT" + and env_var.name not in sensitive_vars_excluded + ): + yield (project_index, var_index), json.dumps( + {env_var.name: env_var.value} + ) + + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) + + for project_index, project in enumerate(projects): report = Check_Report_AWS(metadata=self.metadata(), resource=project) report.status = "PASS" report.status_extended = f"CodeBuild project {project.name} does not have sensitive environment plaintext credentials." @@ -22,25 +47,18 @@ def execute(self): all_secrets = [] if project.environment_variables: - for env_var in project.environment_variables: - if ( - env_var.type == "PLAINTEXT" - and env_var.name not in sensitive_vars_excluded - ): - detect_secrets_output = detect_secrets_scan( - data=json.dumps({env_var.name: env_var.value}), - excluded_secrets=secrets_ignore_patterns, - validate=codebuild_client.audit_config.get( - "secrets_validate", False - ), - ) - if detect_secrets_output: - all_secrets.extend(detect_secrets_output) - secrets_info = [ + for var_index, env_var in enumerate(project.environment_variables): + detect_secrets_output = batch_results.get( + (project_index, var_index) + ) + if detect_secrets_output: + all_secrets.extend(detect_secrets_output) + secrets_found.extend( + [ f"{secret['type']} in variable {env_var.name}" for secret in detect_secrets_output ] - secrets_found.extend(secrets_info) + ) if secrets_found: report.status = "FAIL" diff --git a/prowler/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data.py b/prowler/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data.py index f737bfa923c..4c817d86a86 100644 --- a/prowler/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data.py +++ b/prowler/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data.py @@ -4,7 +4,10 @@ from prowler.config.config import encoding_format_utf_8 from prowler.lib.check.models import Check, Check_Report_AWS from prowler.lib.logger import logger -from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.aws.services.ec2.ec2_client import ec2_client @@ -14,53 +17,70 @@ def execute(self): secrets_ignore_patterns = ec2_client.audit_config.get( "secrets_ignore_patterns", [] ) - for instance in ec2_client.instances: - if instance.state != "terminated": - report = Check_Report_AWS(metadata=self.metadata(), resource=instance) - if instance.user_data: - user_data = b64decode(instance.user_data) - try: - if user_data[0:2] == b"\x1f\x8b": # GZIP magic number - user_data = zlib.decompress( - user_data, zlib.MAX_WBITS | 32 - ).decode(encoding_format_utf_8) - else: - user_data = user_data.decode(encoding_format_utf_8) - except UnicodeDecodeError as error: - logger.warning( - f"{instance.region} -- Unable to decode user data in EC2 instance {instance.id}: {error}" - ) - continue - except Exception as error: - logger.error( - f"{instance.region} -- {error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}" - ) - continue - detect_secrets_output = detect_secrets_scan( - data=user_data, - excluded_secrets=secrets_ignore_patterns, - validate=ec2_client.audit_config.get("secrets_validate", False), - ) - if detect_secrets_output: - secrets_string = ", ".join( - [ - f"{secret['type']} on line {secret['line_number']}" - for secret in detect_secrets_output - ] - ) - report.status = "FAIL" - report.status_extended = f"Potential secret found in EC2 instance {instance.id} User Data -> {secrets_string}." - annotate_verified_secrets(report, detect_secrets_output) + validate = ec2_client.audit_config.get("secrets_validate", False) + instances = list(ec2_client.instances) + + # Collect the decoded User Data of each non-terminated instance and scan + # it all in batched Kingfisher invocations instead of one subprocess each. + # Instances whose User Data cannot be decoded are skipped (no report), + # matching the original per-resource behavior. + skipped = set() + def payloads(): + for index, instance in enumerate(instances): + if instance.state == "terminated" or not instance.user_data: + continue + user_data = b64decode(instance.user_data) + try: + if user_data[0:2] == b"\x1f\x8b": # GZIP magic number + user_data = zlib.decompress( + user_data, zlib.MAX_WBITS | 32 + ).decode(encoding_format_utf_8) else: - report.status = "PASS" - report.status_extended = ( - f"No secrets found in EC2 instance {instance.id} User Data." - ) + user_data = user_data.decode(encoding_format_utf_8) + except UnicodeDecodeError as error: + logger.warning( + f"{instance.region} -- Unable to decode user data in EC2 instance {instance.id}: {error}" + ) + skipped.add(index) + continue + except Exception as error: + logger.error( + f"{instance.region} -- {error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}" + ) + skipped.add(index) + continue + yield index, user_data + + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) + + for index, instance in enumerate(instances): + if instance.state == "terminated" or index in skipped: + continue + report = Check_Report_AWS(metadata=self.metadata(), resource=instance) + if instance.user_data: + detect_secrets_output = batch_results.get(index) + if detect_secrets_output: + secrets_string = ", ".join( + [ + f"{secret['type']} on line {secret['line_number']}" + for secret in detect_secrets_output + ] + ) + report.status = "FAIL" + report.status_extended = f"Potential secret found in EC2 instance {instance.id} User Data -> {secrets_string}." + annotate_verified_secrets(report, detect_secrets_output) else: report.status = "PASS" - report.status_extended = f"No secrets found in EC2 instance {instance.id} since User Data is empty." + report.status_extended = ( + f"No secrets found in EC2 instance {instance.id} User Data." + ) + else: + report.status = "PASS" + report.status_extended = f"No secrets found in EC2 instance {instance.id} since User Data is empty." - findings.append(report) + findings.append(report) return findings diff --git a/prowler/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets.py b/prowler/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets.py index d98ba60e28a..13dc9744333 100644 --- a/prowler/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets.py +++ b/prowler/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets.py @@ -4,7 +4,10 @@ from prowler.config.config import encoding_format_utf_8 from prowler.lib.check.models import Check, Check_Report_AWS from prowler.lib.logger import logger -from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.aws.services.ec2.ec2_client import ec2_client @@ -14,41 +17,49 @@ def execute(self): secrets_ignore_patterns = ec2_client.audit_config.get( "secrets_ignore_patterns", [] ) - for template in ec2_client.launch_templates: - report = Check_Report_AWS(metadata=self.metadata(), resource=template) + validate = ec2_client.audit_config.get("secrets_validate", False) + templates = list(ec2_client.launch_templates) - versions_with_secrets = [] - all_secrets = [] + # Collect the decoded User Data of every (template, version) and scan it + # all in batched Kingfisher invocations instead of one subprocess per + # version. Versions whose User Data cannot be decoded are skipped. + def payloads(): + for template_index, template in enumerate(templates): + for version_index, version in enumerate(template.versions): + if not version.template_data.user_data: + continue + user_data = b64decode(version.template_data.user_data) + try: + if user_data[0:2] == b"\x1f\x8b": # GZIP magic number + user_data = zlib.decompress( + user_data, zlib.MAX_WBITS | 32 + ).decode(encoding_format_utf_8) + else: + user_data = user_data.decode(encoding_format_utf_8) + except UnicodeDecodeError as error: + logger.warning( + f"{template.region} -- Unable to decode User Data in EC2 Launch Template {template.name} version {version.version_number}: {error}" + ) + continue + except Exception as error: + logger.error( + f"{template.region} -- {error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}" + ) + continue + yield (template_index, version_index), user_data - for version in template.versions: - if not version.template_data.user_data: - continue - user_data = b64decode(version.template_data.user_data) + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) - try: - if user_data[0:2] == b"\x1f\x8b": # GZIP magic number - user_data = zlib.decompress( - user_data, zlib.MAX_WBITS | 32 - ).decode(encoding_format_utf_8) - else: - user_data = user_data.decode(encoding_format_utf_8) - except UnicodeDecodeError as error: - logger.warning( - f"{template.region} -- Unable to decode User Data in EC2 Launch Template {template.name} version {version.version_number}: {error}" - ) - continue - except Exception as error: - logger.error( - f"{template.region} -- {error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}" - ) - continue + for template_index, template in enumerate(templates): + report = Check_Report_AWS(metadata=self.metadata(), resource=template) - version_secrets = detect_secrets_scan( - data=user_data, - excluded_secrets=secrets_ignore_patterns, - validate=ec2_client.audit_config.get("secrets_validate", False), - ) + versions_with_secrets = [] + all_secrets = [] + for version_index, version in enumerate(template.versions): + version_secrets = batch_results.get((template_index, version_index)) if version_secrets: all_secrets.extend(version_secrets) secrets_string = ", ".join( diff --git a/prowler/providers/aws/services/glue/glue_etl_jobs_no_secrets_in_arguments/glue_etl_jobs_no_secrets_in_arguments.py b/prowler/providers/aws/services/glue/glue_etl_jobs_no_secrets_in_arguments/glue_etl_jobs_no_secrets_in_arguments.py index 4b6c920cc4a..1ef8fc72d02 100644 --- a/prowler/providers/aws/services/glue/glue_etl_jobs_no_secrets_in_arguments/glue_etl_jobs_no_secrets_in_arguments.py +++ b/prowler/providers/aws/services/glue/glue_etl_jobs_no_secrets_in_arguments/glue_etl_jobs_no_secrets_in_arguments.py @@ -1,55 +1,68 @@ -import json - -from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan -from prowler.providers.aws.services.glue.glue_client import glue_client - - -class glue_etl_jobs_no_secrets_in_arguments(Check): - """Check if Glue ETL jobs have secrets in their default arguments. - - Scans the DefaultArguments of each Glue job for hardcoded credentials, - tokens, passwords, and other sensitive values that should be stored in - Secrets Manager or Parameter Store instead. - """ - - def execute(self): - findings = [] - secrets_ignore_patterns = glue_client.audit_config.get( - "secrets_ignore_patterns", [] - ) - for job in glue_client.jobs: - report = Check_Report_AWS(metadata=self.metadata(), resource=job) - report.status = "PASS" - report.status_extended = ( - f"No secrets found in Glue job {job.name} default arguments." - ) - - if job.arguments: - secrets_found = [] - all_secrets = [] - for arg_name, arg_value in job.arguments.items(): - detect_secrets_output = detect_secrets_scan( - data=json.dumps({arg_name: arg_value}), - excluded_secrets=secrets_ignore_patterns, - validate=glue_client.audit_config.get( - "secrets_validate", False - ), - ) - if detect_secrets_output: - all_secrets.extend(detect_secrets_output) - secrets_found.extend( - [ - f"{secret['type']} in argument {arg_name}" - for secret in detect_secrets_output - ] - ) - - if secrets_found: - report.status = "FAIL" - report.status_extended = f"Potential secrets found in Glue job {job.name} default arguments: {', '.join(secrets_found)}." - annotate_verified_secrets(report, all_secrets) - - findings.append(report) - - return findings +import json + +from prowler.lib.check.models import Check, Check_Report_AWS +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) +from prowler.providers.aws.services.glue.glue_client import glue_client + + +class glue_etl_jobs_no_secrets_in_arguments(Check): + """Check if Glue ETL jobs have secrets in their default arguments. + + Scans the DefaultArguments of each Glue job for hardcoded credentials, + tokens, passwords, and other sensitive values that should be stored in + Secrets Manager or Parameter Store instead. + """ + + def execute(self): + findings = [] + secrets_ignore_patterns = glue_client.audit_config.get( + "secrets_ignore_patterns", [] + ) + validate = glue_client.audit_config.get("secrets_validate", False) + jobs = list(glue_client.jobs) + + # Collect every default argument across all jobs and scan them in batched + # Kingfisher invocations instead of one subprocess per argument. Findings + # are keyed by (job index, argument name). + def payloads(): + for job_index, job in enumerate(jobs): + if job.arguments: + for arg_name, arg_value in job.arguments.items(): + yield (job_index, arg_name), json.dumps({arg_name: arg_value}) + + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) + + for job_index, job in enumerate(jobs): + report = Check_Report_AWS(metadata=self.metadata(), resource=job) + report.status = "PASS" + report.status_extended = ( + f"No secrets found in Glue job {job.name} default arguments." + ) + + if job.arguments: + secrets_found = [] + all_secrets = [] + for arg_name in job.arguments: + detect_secrets_output = batch_results.get((job_index, arg_name)) + if detect_secrets_output: + all_secrets.extend(detect_secrets_output) + secrets_found.extend( + [ + f"{secret['type']} in argument {arg_name}" + for secret in detect_secrets_output + ] + ) + + if secrets_found: + report.status = "FAIL" + report.status_extended = f"Potential secrets found in Glue job {job.name} default arguments: {', '.join(secrets_found)}." + annotate_verified_secrets(report, all_secrets) + + findings.append(report) + + return findings diff --git a/prowler/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets.py b/prowler/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets.py index 09bec239fb6..0fac7dd5880 100644 --- a/prowler/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets.py +++ b/prowler/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets.py @@ -1,7 +1,10 @@ import json from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.aws.services.ssm.ssm_client import ssm_client @@ -11,7 +14,21 @@ def execute(self): secrets_ignore_patterns = ssm_client.audit_config.get( "secrets_ignore_patterns", [] ) - for document in ssm_client.documents.values(): + validate = ssm_client.audit_config.get("secrets_validate", False) + documents = list(ssm_client.documents.values()) + + # Collect one payload per document (its content) and scan them all in + # batched Kingfisher invocations instead of one subprocess per document. + def payloads(): + for index, document in enumerate(documents): + if document.content: + yield index, json.dumps(document.content, indent=2) + + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) + + for index, document in enumerate(documents): report = Check_Report_AWS(metadata=self.metadata(), resource=document) report.status = "PASS" report.status_extended = ( @@ -19,11 +36,7 @@ def execute(self): ) if document.content: - detect_secrets_output = detect_secrets_scan( - data=json.dumps(document.content, indent=2), - excluded_secrets=secrets_ignore_patterns, - validate=ssm_client.audit_config.get("secrets_validate", False), - ) + detect_secrets_output = batch_results.get(index) if detect_secrets_output: secrets_string = ", ".join( [ diff --git a/prowler/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition.py b/prowler/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition.py index 72bc06c84f8..eab8c9ec569 100644 --- a/prowler/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition.py +++ b/prowler/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition.py @@ -1,5 +1,8 @@ from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.aws.services.stepfunctions.stepfunctions_client import ( stepfunctions_client, ) @@ -13,20 +16,27 @@ def execute(self) -> list[Check_Report_AWS]: secrets_ignore_patterns = stepfunctions_client.audit_config.get( "secrets_ignore_patterns", [] ) - for state_machine in stepfunctions_client.state_machines.values(): + validate = stepfunctions_client.audit_config.get("secrets_validate", False) + state_machines = list(stepfunctions_client.state_machines.values()) + + # Collect one payload per state machine (its definition) and scan them + # all in batched Kingfisher invocations instead of one subprocess each. + def payloads(): + for index, state_machine in enumerate(state_machines): + if state_machine.definition: + yield index, state_machine.definition + + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) + + for index, state_machine in enumerate(state_machines): report = Check_Report_AWS(metadata=self.metadata(), resource=state_machine) report.status = "PASS" report.status_extended = f"No secrets found in Step Functions state machine {state_machine.name} definition." if state_machine.definition: - detect_secrets_output = detect_secrets_scan( - data=state_machine.definition, - excluded_secrets=secrets_ignore_patterns, - validate=stepfunctions_client.audit_config.get( - "secrets_validate", False - ), - ) - + detect_secrets_output = batch_results.get(index) if detect_secrets_output: secrets_string = ", ".join( [ diff --git a/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.py b/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.py index e0cee0bc735..d1069953d07 100644 --- a/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.py +++ b/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.py @@ -2,7 +2,10 @@ from typing import List from prowler.lib.check.models import Check, CheckReportOpenStack -from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.openstack.services.blockstorage.blockstorage_client import ( blockstorage_client, ) @@ -16,30 +19,28 @@ def execute(self) -> List[CheckReportOpenStack]: secrets_ignore_patterns = blockstorage_client.audit_config.get( "secrets_ignore_patterns", [] ) + validate = blockstorage_client.audit_config.get("secrets_validate", False) + snapshots = list(blockstorage_client.snapshots) + + # Collect one payload per snapshot (its metadata) and scan them all in + # batched Kingfisher invocations instead of one subprocess per snapshot. + def payloads(): + for index, snapshot in enumerate(snapshots): + if snapshot.metadata: + yield index, json.dumps(dict(snapshot.metadata), indent=2) - for snapshot in blockstorage_client.snapshots: + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) + + for index, snapshot in enumerate(snapshots): report = CheckReportOpenStack(metadata=self.metadata(), resource=snapshot) report.status = "PASS" report.status_extended = f"Snapshot {snapshot.name} ({snapshot.id}) metadata does not contain sensitive data." if snapshot.metadata: - # Build metadata dict and parallel list of keys - dump_metadata = {} - original_metadata_keys = [] - for key, value in snapshot.metadata.items(): - dump_metadata[key] = value - original_metadata_keys.append(key) - - # Convert metadata dict to JSON string for detect-secrets scanning - metadata_json = json.dumps(dump_metadata, indent=2) - detect_secrets_output = detect_secrets_scan( - data=metadata_json, - excluded_secrets=secrets_ignore_patterns, - validate=blockstorage_client.audit_config.get( - "secrets_validate", False - ), - ) - + original_metadata_keys = list(snapshot.metadata.keys()) + detect_secrets_output = batch_results.get(index) if detect_secrets_output: # Map line numbers back to metadata keys using the parallel list # Line numbering: line 1 = "{", line 2 = first key-value, etc. diff --git a/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.py b/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.py index dd95d28ec80..968d1c1ac58 100644 --- a/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.py +++ b/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.py @@ -2,7 +2,10 @@ from typing import List from prowler.lib.check.models import Check, CheckReportOpenStack -from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.openstack.services.blockstorage.blockstorage_client import ( blockstorage_client, ) @@ -16,30 +19,28 @@ def execute(self) -> List[CheckReportOpenStack]: secrets_ignore_patterns = blockstorage_client.audit_config.get( "secrets_ignore_patterns", [] ) + validate = blockstorage_client.audit_config.get("secrets_validate", False) + volumes = list(blockstorage_client.volumes) + + # Collect one payload per volume (its metadata) and scan them all in + # batched Kingfisher invocations instead of one subprocess per volume. + def payloads(): + for index, volume in enumerate(volumes): + if volume.metadata: + yield index, json.dumps(dict(volume.metadata), indent=2) - for volume in blockstorage_client.volumes: + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) + + for index, volume in enumerate(volumes): report = CheckReportOpenStack(metadata=self.metadata(), resource=volume) report.status = "PASS" report.status_extended = f"Volume {volume.name} ({volume.id}) metadata does not contain sensitive data." if volume.metadata: - # Build metadata dict and parallel list of keys - dump_metadata = {} - original_metadata_keys = [] - for key, value in volume.metadata.items(): - dump_metadata[key] = value - original_metadata_keys.append(key) - - # Convert metadata dict to JSON string for detect-secrets scanning - metadata_json = json.dumps(dump_metadata, indent=2) - detect_secrets_output = detect_secrets_scan( - data=metadata_json, - excluded_secrets=secrets_ignore_patterns, - validate=blockstorage_client.audit_config.get( - "secrets_validate", False - ), - ) - + original_metadata_keys = list(volume.metadata.keys()) + detect_secrets_output = batch_results.get(index) if detect_secrets_output: # Map line numbers back to metadata keys using the parallel list # Line numbering: line 1 = "{", line 2 = first key-value, etc. diff --git a/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.py b/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.py index 8a0b9740db6..bfca160b916 100644 --- a/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.py +++ b/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.py @@ -2,7 +2,10 @@ from typing import List from prowler.lib.check.models import Check, CheckReportOpenStack -from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.openstack.services.compute.compute_client import compute_client @@ -14,28 +17,28 @@ def execute(self) -> List[CheckReportOpenStack]: secrets_ignore_patterns = compute_client.audit_config.get( "secrets_ignore_patterns", [] ) + validate = compute_client.audit_config.get("secrets_validate", False) + instances = list(compute_client.instances) + + # Collect one payload per instance (its metadata) and scan them all in + # batched Kingfisher invocations instead of one subprocess per instance. + def payloads(): + for index, instance in enumerate(instances): + if instance.metadata: + yield index, json.dumps(dict(instance.metadata), indent=2) + + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) - for instance in compute_client.instances: + for index, instance in enumerate(instances): report = CheckReportOpenStack(metadata=self.metadata(), resource=instance) report.status = "PASS" report.status_extended = f"Instance {instance.name} ({instance.id}) metadata does not contain sensitive data." if instance.metadata: - # Build metadata dict and parallel list of keys (similar to AWS ECS pattern) - dump_metadata = {} - original_metadata_keys = [] - for key, value in instance.metadata.items(): - dump_metadata[key] = value - original_metadata_keys.append(key) - - # Convert metadata dict to JSON string for detect-secrets scanning - metadata_json = json.dumps(dump_metadata, indent=2) - detect_secrets_output = detect_secrets_scan( - data=metadata_json, - excluded_secrets=secrets_ignore_patterns, - validate=compute_client.audit_config.get("secrets_validate", False), - ) - + original_metadata_keys = list(instance.metadata.keys()) + detect_secrets_output = batch_results.get(index) if detect_secrets_output: # Map line numbers back to metadata keys using the parallel list # Line numbering: line 1 = "{", line 2 = first key-value, etc. diff --git a/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.py b/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.py index ae7b1fc00e4..920a807945b 100644 --- a/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.py +++ b/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.py @@ -2,7 +2,10 @@ from typing import List from prowler.lib.check.models import Check, CheckReportOpenStack -from prowler.lib.utils.utils import annotate_verified_secrets, detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.openstack.services.objectstorage.objectstorage_client import ( objectstorage_client, ) @@ -16,8 +19,21 @@ def execute(self) -> List[CheckReportOpenStack]: secrets_ignore_patterns = objectstorage_client.audit_config.get( "secrets_ignore_patterns", [] ) + validate = objectstorage_client.audit_config.get("secrets_validate", False) + containers = list(objectstorage_client.containers) + + # Collect one payload per container (its metadata) and scan them all in + # batched Kingfisher invocations instead of one subprocess per container. + def payloads(): + for index, container in enumerate(containers): + if container.metadata: + yield index, json.dumps(dict(container.metadata), indent=2) + + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) - for container in objectstorage_client.containers: + for index, container in enumerate(containers): report = CheckReportOpenStack(metadata=self.metadata(), resource=container) report.status = "PASS" report.status_extended = ( @@ -25,23 +41,8 @@ def execute(self) -> List[CheckReportOpenStack]: ) if container.metadata: - # Build metadata dict and parallel list of keys - dump_metadata = {} - original_metadata_keys = [] - for key, value in container.metadata.items(): - dump_metadata[key] = value - original_metadata_keys.append(key) - - # Convert metadata dict to JSON string for detect-secrets scanning - metadata_json = json.dumps(dump_metadata, indent=2) - detect_secrets_output = detect_secrets_scan( - data=metadata_json, - excluded_secrets=secrets_ignore_patterns, - validate=objectstorage_client.audit_config.get( - "secrets_validate", False - ), - ) - + original_metadata_keys = list(container.metadata.keys()) + detect_secrets_output = batch_results.get(index) if detect_secrets_output: # Map line numbers back to metadata keys using the parallel list # Line numbering: line 1 = "{", line 2 = first key-value, etc. diff --git a/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data_test.py b/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data_test.py index 02763e5ea3a..eee9a3a0a94 100644 --- a/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data_test.py +++ b/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data_test.py @@ -365,16 +365,18 @@ def test_one_ec2_with_verified_secret(self): new=EC2(aws_provider), ), mock.patch( - "prowler.providers.aws.services.ec2.ec2_instance_secrets_user_data.ec2_instance_secrets_user_data.detect_secrets_scan", - return_value=[ - { - "type": "JSON Web Token (base64url-encoded)", - "line_number": 1, - "filename": "data", - "hashed_secret": "x", - "is_verified": True, - } - ], + "prowler.providers.aws.services.ec2.ec2_instance_secrets_user_data.ec2_instance_secrets_user_data.detect_secrets_scan_batch", + return_value={ + 0: [ + { + "type": "JSON Web Token (base64url-encoded)", + "line_number": 1, + "filename": "data", + "hashed_secret": "x", + "is_verified": True, + } + ] + }, ), ): from prowler.providers.aws.services.ec2.ec2_instance_secrets_user_data.ec2_instance_secrets_user_data import ( From 92262a6eb4773ec0fd8e2f9c021baf5636d8c62f Mon Sep 17 00:00:00 2001 From: Daniel Barranquero Date: Thu, 25 Jun 2026 12:31:38 +0200 Subject: [PATCH 12/20] docs: document the batched secret-scanning check structure --- docs/developer-guide/checks.mdx | 2 + .../secret-scanning-checks.mdx | 112 ++++++++++++++++++ docs/docs.json | 1 + 3 files changed, 115 insertions(+) create mode 100644 docs/developer-guide/secret-scanning-checks.mdx diff --git a/docs/developer-guide/checks.mdx b/docs/developer-guide/checks.mdx index 2f0f45cd96b..5334799d362 100644 --- a/docs/developer-guide/checks.mdx +++ b/docs/developer-guide/checks.mdx @@ -445,3 +445,5 @@ The metadata structure is enforced in code using a Pydantic model. For reference ## Specific Check Patterns Details for specific providers can be found in documentation pages named using the pattern `-details`. + +Checks that scan resources for plaintext secrets follow a dedicated batched structure. Refer to [Secret-Scanning Checks](/developer-guide/secret-scanning-checks) before creating or updating one. diff --git a/docs/developer-guide/secret-scanning-checks.mdx b/docs/developer-guide/secret-scanning-checks.mdx new file mode 100644 index 00000000000..adeed77e8d8 --- /dev/null +++ b/docs/developer-guide/secret-scanning-checks.mdx @@ -0,0 +1,112 @@ +--- +title: 'Secret-Scanning Checks' +--- + +Prowler scans audited resources for plaintext secrets using [Kingfisher](https://github.com/mongodb/kingfisher), an open-source secret-scanning engine that Prowler invokes as a subprocess. This guide explains the structure every secret-scanning check must follow to keep scanning correct and efficient on large accounts. + +## Overview + +Secret detection runs through two helpers in `prowler/lib/utils/utils.py`: + +- **`detect_secrets_scan(data=..., file=..., excluded_secrets=..., validate=...)`** scans a single payload and returns a list of finding dictionaries (or `None`). Reserve it for one-off scans. +- **`detect_secrets_scan_batch(payloads, excluded_secrets=..., validate=...)`** scans many payloads in chunked subprocess invocations and returns a `{key: [findings]}` dictionary. This is the standard helper for checks. + +Every Kingfisher invocation carries a fixed process-startup cost (around 100 ms). Calling `detect_secrets_scan` once per resource spawns thousands of subprocesses on large accounts (for example, thousands of CloudWatch log groups). `detect_secrets_scan_batch` amortizes that cost: it writes each payload to a temporary file as it consumes them, runs one subprocess per chunk (500 payloads by default), and maps the findings back to each payload by key. + +## The Batched Structure + +Every secret-scanning check follows three phases. + +### Phase 1: Collect + +Define a generator that yields `(key, payload)` for each scannable unit. The generator builds payload strings only — it does not call Kingfisher. Lazy yielding keeps memory and temporary-disk usage bounded to a single chunk, which matters when an account holds thousands of resources. + +### Phase 2: Batch + +Call `detect_secrets_scan_batch` once with the generator. The helper consumes it in chunks, runs Kingfisher per chunk, and returns the keys that produced findings mapped to their finding lists. + +### Phase 3: Report + +Iterate the resources, look up the findings by key, and build one report per resource. + +```python +from prowler.lib.check.models import Check, Check_Report_AWS +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) +from prowler.providers.aws.services.example.example_client import example_client + + +class example_resource_no_secrets(Check): + def execute(self): + findings = [] + excluded = example_client.audit_config.get("secrets_ignore_patterns", []) + validate = example_client.audit_config.get("secrets_validate", False) + resources = list(example_client.resources) + + # Phase 1: collect — builds strings only, no scan. + def payloads(): + for index, resource in enumerate(resources): + if resource.scannable_data: + yield index, serialize(resource) + + # Phase 2: batch — one call, chunked subprocesses. + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=excluded, validate=validate + ) + + # Phase 3: report — look up findings by key. + for index, resource in enumerate(resources): + report = Check_Report_AWS(metadata=self.metadata(), resource=resource) + report.status = "PASS" + report.status_extended = f"No secrets found in {resource.name}." + detect_secrets_output = batch_results.get(index) + if detect_secrets_output: + report.status = "FAIL" + report.status_extended = ( + f"Potential secret found in {resource.name} -> ..." + ) + annotate_verified_secrets(report, detect_secrets_output) + findings.append(report) + + return findings +``` + +## Choosing the Key + +The key maps each finding back to its source. Two shapes cover every check: + +- **One payload per resource:** use the resource index. This fits checks that serialize a single payload per resource, such as launch configurations, CloudFormation outputs, SSM documents, Step Functions definitions, and OpenStack metadata. +- **Several payloads per resource:** use a `(resource_index, fragment)` tuple, where the fragment identifies the variable, log stream, container, file, or version. Phase 3 groups the per-fragment findings to build the resource report. This fits CloudWatch log streams, ECS containers, CodeBuild variables, Glue arguments, and Lambda code files. + +Derive the indices from the same `list(...)` of resources in both Phase 1 and Phase 3 so the order stays stable and the keys align. + +## Preserving Per-Payload Results + +`detect_secrets_scan_batch` runs Kingfisher with `--no-dedup`, so a secret that appears in more than one payload is reported for each one. This reproduces the result of scanning each payload individually. Build payload strings exactly as a single scan would: serialize the same data and keep line ordering, because messages often map a finding's `line_number` back to a variable name or metadata key. + +## Validation and Severity + +Both helpers accept `validate`, read from `secrets_validate` in the provider configuration or the `--scan-secrets-validate` flag. When enabled, Kingfisher confirms whether each secret is live, and confirmed secrets carry `is_verified: True`. + +After marking a report as `FAIL`, pass the findings to `annotate_verified_secrets(report, findings)`. When any secret is verified, the helper escalates the finding to critical severity and appends a note that the secret was confirmed live. Validation stays off by default because it sends the discovered secret to the provider API. + +## Excluded Secrets + +`detect_secrets_scan_batch` applies `secrets_ignore_patterns` — regular expressions from the provider configuration — against each finding's source line and drops the matches, mirroring single-scan behavior. + +## Testing + +To assert on the verified-secret path, mock `detect_secrets_scan_batch` in the check module and return the keyed dictionary. For a single resource scanned at index `0`: + +```python +mock.patch( + "prowler.providers.aws.services.example.example_resource_no_secrets.example_resource_no_secrets.detect_secrets_scan_batch", + return_value={ + 0: [{"type": "...", "line_number": 1, "is_verified": True}] + }, +) +``` + +Most tests need no mock at all: they seed resources that contain example secrets and assert on the `FAIL` status and message, which exercises the real batched path. Refer to the [Testing](/developer-guide/unit-testing) documentation for the general structure. diff --git a/docs/docs.json b/docs/docs.json index 5b88b5d2dae..b46e83eee0c 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -396,6 +396,7 @@ "developer-guide/provider", "developer-guide/services", "developer-guide/checks", + "developer-guide/secret-scanning-checks", "developer-guide/outputs", "developer-guide/integrations", "developer-guide/security-compliance-framework", From 6be7e017fa983e9acab94c40df87417d968228f3 Mon Sep 17 00:00:00 2001 From: Daniel Barranquero Date: Thu, 25 Jun 2026 13:17:55 +0200 Subject: [PATCH 13/20] perf(sdk): batch the cloudwatch multiline-event rescan --- ...cloudwatch_log_group_no_secrets_in_logs.py | 210 ++++++++++-------- 1 file changed, 114 insertions(+), 96 deletions(-) diff --git a/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py b/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py index 2976d39a6cc..a790774a5cc 100644 --- a/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py +++ b/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py @@ -3,7 +3,6 @@ from prowler.lib.check.models import Check, Check_Report_AWS from prowler.lib.utils.utils import ( annotate_verified_secrets, - detect_secrets_scan, detect_secrets_scan_batch, ) from prowler.providers.aws.services.cloudwatch.cloudwatch_service import ( @@ -15,107 +14,126 @@ class cloudwatch_log_group_no_secrets_in_logs(Check): def execute(self): findings = [] - if logs_client.log_groups: - secrets_ignore_patterns = logs_client.audit_config.get( - "secrets_ignore_patterns", [] - ) - validate = logs_client.audit_config.get("secrets_validate", False) + if not logs_client.log_groups: + return findings - # Scan every (log group, log stream) in batched Kingfisher - # invocations instead of one subprocess per stream. The payloads are - # yielded lazily so only a chunk's worth is ever written to disk / - # held in memory at a time, which matters for accounts with very - # large numbers of log groups/streams. - def stream_payloads(): - for log_group in logs_client.log_groups.values(): - if not log_group.log_streams: - continue - for log_stream_name, events in log_group.log_streams.items(): - yield ( - (log_group.name, log_stream_name), - "\n".join(dumps(event["message"]) for event in events), - ) - - batch_results = detect_secrets_scan_batch( - stream_payloads(), - excluded_secrets=secrets_ignore_patterns, - validate=validate, - ) + secrets_ignore_patterns = logs_client.audit_config.get( + "secrets_ignore_patterns", [] + ) + validate = logs_client.audit_config.get("secrets_validate", False) + # Phase 1: batch-scan every (log group, log stream). Payloads are yielded + # lazily so only a chunk is written/held at a time, which matters for + # accounts with very large numbers of log groups/streams. + def stream_payloads(): for log_group in logs_client.log_groups.values(): - report = Check_Report_AWS(metadata=self.metadata(), resource=log_group) - report.status = "PASS" - report.status_extended = ( - f"No secrets found in {log_group.name} log group." - ) - log_group_secrets = [] - all_secrets = [] - if log_group.log_streams: - for log_stream_name in log_group.log_streams: - log_stream_secrets_output = batch_results.get( - (log_group.name, log_stream_name) + if not log_group.log_streams: + continue + for log_stream_name, events in log_group.log_streams.items(): + yield ( + (log_group.name, log_stream_name), + "\n".join(dumps(event["message"]) for event in events), + ) + + stream_results = detect_secrets_scan_batch( + stream_payloads(), + excluded_secrets=secrets_ignore_patterns, + validate=validate, + ) + + # Phase 2: plan the per-timestamp secrets for each flagged stream and + # collect the multiline events to rescan. Each multiline event is + # rescanned once (keyed by timestamp) to resolve per-line detail; the + # rescans are batched in Phase 3 instead of one subprocess per event. + stream_plans = {} # (group, stream) -> {timestamp: {"multiline", "types"}} + rescan_payloads = {} # (group, stream, timestamp) -> multiline event data + for log_group in logs_client.log_groups.values(): + for log_stream_name in log_group.log_streams or {}: + stream_secrets = stream_results.get((log_group.name, log_stream_name)) + if not stream_secrets: + continue + events = log_group.log_streams[log_stream_name] + plan = {} + for secret in stream_secrets: + flagged_event = events[secret["line_number"] - 1] + cloudwatch_timestamp = convert_to_cloudwatch_timestamp_format( + flagged_event["timestamp"] + ) + try: + log_event_data = dumps( + loads(flagged_event["message"]), indent=2 ) - if not log_stream_secrets_output: - continue - log_stream_secrets = {} - all_secrets.extend(log_stream_secrets_output) - for secret in log_stream_secrets_output: - flagged_event = log_group.log_streams[log_stream_name][ - secret["line_number"] - 1 - ] - cloudwatch_timestamp = ( - convert_to_cloudwatch_timestamp_format( - flagged_event["timestamp"] - ) - ) - if cloudwatch_timestamp not in log_stream_secrets.keys(): - log_stream_secrets[cloudwatch_timestamp] = SecretsDict() + except Exception: + log_event_data = dumps(flagged_event["message"], indent=2) + multiline = len(log_event_data.split("\n")) > 1 + if cloudwatch_timestamp not in plan: + plan[cloudwatch_timestamp] = { + "multiline": multiline, + "types": [], + } + if multiline: + # More informative output is possible with more than one + # line: the event is rescanned to get the type and line + # number of each secret. + rescan_payloads[ + (log_group.name, log_stream_name, cloudwatch_timestamp) + ] = log_event_data + else: + plan[cloudwatch_timestamp]["types"].append(secret["type"]) + stream_plans[(log_group.name, log_stream_name)] = plan - try: - log_event_data = dumps( - loads(flagged_event["message"]), indent=2 - ) - except Exception: - log_event_data = dumps( - flagged_event["message"], indent=2 - ) - if len(log_event_data.split("\n")) > 1: - # Can get more informative output if there is more than 1 line. - # Will rescan just this event to get the type of secret and the line number. - # Validation is disabled here: this rescan only resolves line numbers - # for display and must not re-authenticate the secret. - event_detect_secrets_output = detect_secrets_scan( - data=log_event_data, - validate=False, - ) - if event_detect_secrets_output: - for event_secret in event_detect_secrets_output: - log_stream_secrets[ - cloudwatch_timestamp - ].add_secret( - event_secret["line_number"], - event_secret["type"], - ) - else: - log_stream_secrets[cloudwatch_timestamp].add_secret( - 1, secret["type"] - ) - if log_stream_secrets: - secrets_string = "; ".join( - [ - f"at {timestamp} - {log_stream_secrets[timestamp].to_string()}" - for timestamp in log_stream_secrets - ] - ) - log_group_secrets.append( - f"in log stream {log_stream_name} {secrets_string}" + # Phase 3: one batched rescan for all multiline flagged events. Validation + # is never enabled here: this rescan only resolves line numbers for + # display and must not re-authenticate the secret. + rescan_results = ( + detect_secrets_scan_batch(rescan_payloads) if rescan_payloads else {} + ) + + # Phase 4: assemble one report per log group. + for log_group in logs_client.log_groups.values(): + report = Check_Report_AWS(metadata=self.metadata(), resource=log_group) + report.status = "PASS" + report.status_extended = f"No secrets found in {log_group.name} log group." + log_group_secrets = [] + all_secrets = [] + for log_stream_name in log_group.log_streams or {}: + stream_secrets = stream_results.get((log_group.name, log_stream_name)) + if not stream_secrets: + continue + all_secrets.extend(stream_secrets) + log_stream_secrets = {} + for cloudwatch_timestamp, entry in stream_plans[ + (log_group.name, log_stream_name) + ].items(): + secrets_dict = SecretsDict() + if entry["multiline"]: + for event_secret in rescan_results.get( + (log_group.name, log_stream_name, cloudwatch_timestamp), + [], + ): + secrets_dict.add_secret( + event_secret["line_number"], event_secret["type"] ) - if log_group_secrets: - secrets_string = "; ".join(log_group_secrets) - report.status = "FAIL" - report.status_extended = f"Potential secrets found in log group {log_group.name} {secrets_string}." - annotate_verified_secrets(report, all_secrets) - findings.append(report) + else: + for secret_type in entry["types"]: + secrets_dict.add_secret(1, secret_type) + log_stream_secrets[cloudwatch_timestamp] = secrets_dict + if log_stream_secrets: + secrets_string = "; ".join( + [ + f"at {timestamp} - {log_stream_secrets[timestamp].to_string()}" + for timestamp in log_stream_secrets + ] + ) + log_group_secrets.append( + f"in log stream {log_stream_name} {secrets_string}" + ) + if log_group_secrets: + secrets_string = "; ".join(log_group_secrets) + report.status = "FAIL" + report.status_extended = f"Potential secrets found in log group {log_group.name} {secrets_string}." + annotate_verified_secrets(report, all_secrets) + findings.append(report) return findings From 388e47175b84e1ae4e84c5696e1aa82df48a21b4 Mon Sep 17 00:00:00 2001 From: Daniel Barranquero Date: Thu, 25 Jun 2026 13:17:58 +0200 Subject: [PATCH 14/20] refactor(sdk): remove unused single-payload detect_secrets_scan helper --- .../secret-scanning-checks.mdx | 7 +- prowler/lib/utils/utils.py | 135 +++--------------- tests/lib/utils/utils_test.py | 111 +------------- 3 files changed, 22 insertions(+), 231 deletions(-) diff --git a/docs/developer-guide/secret-scanning-checks.mdx b/docs/developer-guide/secret-scanning-checks.mdx index adeed77e8d8..79794bb5aa9 100644 --- a/docs/developer-guide/secret-scanning-checks.mdx +++ b/docs/developer-guide/secret-scanning-checks.mdx @@ -6,12 +6,11 @@ Prowler scans audited resources for plaintext secrets using [Kingfisher](https:/ ## Overview -Secret detection runs through two helpers in `prowler/lib/utils/utils.py`: +Secret detection runs through a single helper in `prowler/lib/utils/utils.py`: -- **`detect_secrets_scan(data=..., file=..., excluded_secrets=..., validate=...)`** scans a single payload and returns a list of finding dictionaries (or `None`). Reserve it for one-off scans. -- **`detect_secrets_scan_batch(payloads, excluded_secrets=..., validate=...)`** scans many payloads in chunked subprocess invocations and returns a `{key: [findings]}` dictionary. This is the standard helper for checks. +- **`detect_secrets_scan_batch(payloads, excluded_secrets=..., validate=...)`** scans many payloads in chunked subprocess invocations and returns a `{key: [findings]}` dictionary. To scan a single payload, pass a one-entry mapping (for example, `{0: data}`). -Every Kingfisher invocation carries a fixed process-startup cost (around 100 ms). Calling `detect_secrets_scan` once per resource spawns thousands of subprocesses on large accounts (for example, thousands of CloudWatch log groups). `detect_secrets_scan_batch` amortizes that cost: it writes each payload to a temporary file as it consumes them, runs one subprocess per chunk (500 payloads by default), and maps the findings back to each payload by key. +Every Kingfisher invocation carries a fixed process-startup cost (around 100 ms). Scanning once per resource would spawn thousands of subprocesses on large accounts (for example, thousands of CloudWatch log groups). `detect_secrets_scan_batch` amortizes that cost: it writes each payload to a temporary file as it consumes them, runs one subprocess per chunk (500 payloads by default), and maps the findings back to each payload by key. ## The Batched Structure diff --git a/prowler/lib/utils/utils.py b/prowler/lib/utils/utils.py index 3963ef9f1c0..c7d6ffccb6a 100644 --- a/prowler/lib/utils/utils.py +++ b/prowler/lib/utils/utils.py @@ -160,114 +160,6 @@ def hash_sha512(string: str) -> str: return sha512(string.encode(encoding_format_utf_8)).hexdigest()[0:9] -def detect_secrets_scan( - data: str = None, - file=None, - excluded_secrets: list[str] = None, - detect_secrets_plugins: dict = None, - confidence: str = default_secrets_confidence, - validate: bool = False, -) -> list[dict[str, str]]: - """detect_secrets_scan scans the data or file for secrets using Kingfisher. - - By default the scan runs fully offline (`--no-validate`, `--no-update-check`): - no network calls are made, so the scanned data is never sent anywhere. - Kingfisher's built-in ruleset is used at "low" confidence so its generic - keyword rules fire (see ``default_secrets_confidence``). - - When ``validate`` is True, Kingfisher additionally checks whether each - discovered secret is live by authenticating with it against the provider's - API (the secret itself is used as the credential; no extra permissions are - required). This makes outbound network calls and the discovered credential - is exercised against the provider, so it must be explicitly opted in. - - Args: - data (str): The data to scan for secrets. - file (str): The file to scan for secrets. - excluded_secrets (list): A list of regex patterns; any finding whose - source line matches one of them is excluded from the results. - detect_secrets_plugins (dict): Deprecated. Kept for backwards - compatibility with existing call sites; ignored by Kingfisher. - confidence (str): Minimum Kingfisher confidence to report ("low", - "medium" or "high"). Defaults to ``default_secrets_confidence``. - validate (bool): When True, validate discovered secrets against the - provider APIs (live check). Makes outbound network calls. Defaults - to False (fully offline). - Returns: - list[dict] | None: A list of findings, each with ``filename``, - ``line_number``, ``type``, ``hashed_secret`` and ``is_verified`` - keys, or ``None`` when no secrets are found or an error occurs. - ``is_verified`` is True only when ``validate`` is True and the - secret was confirmed live. - Examples: - >>> detect_secrets_scan(data='password = "Tr0ub4dor&3xKq9vLmZ"') - [{'filename': '/tmp/...', 'line_number': 1, 'type': 'Generic Password', 'hashed_secret': '...', 'is_verified': False}] - """ - if detect_secrets_plugins is not None: - logger.debug( - "detect_secrets_plugins is deprecated and ignored when scanning with Kingfisher." - ) - - temp_data_file = None - temp_output_file = None - try: - if file: - scan_path = file - else: - # Ensure a trailing newline: Kingfisher does not scan the final line - # of a file when it is not newline-terminated, and serialized payloads - # (JSON dumps, joined log events, state-machine definitions) often are - # not. Appending "\n" does not change line numbers or secret content. - content = data if data.endswith("\n") else data + "\n" - temp_data_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt") - temp_data_file.write(bytes(content, encoding="raw_unicode_escape")) - temp_data_file.close() - scan_path = temp_data_file.name - - temp_output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".json") - temp_output_file.close() - - command = _build_kingfisher_command( - [scan_path], temp_output_file.name, confidence, validate - ) - process = subprocess.run(command, capture_output=True, text=True) - if process.returncode not in _kingfisher_success_exit_codes: - logger.error( - f"Error scanning for secrets: Kingfisher exited with code " - f"{process.returncode}: {process.stderr.strip()[:500]}" - ) - return None - - with open(temp_output_file.name, encoding=encoding_format_utf_8) as f: - output = f.read() - kingfisher_output = json.loads(output) if output.strip() else {} - - # Read source lines once to apply excluded_secrets against the full line - # (preserving detect-secrets' should_exclude_line semantics). - source_lines = [] - if excluded_secrets: - with open(scan_path, encoding=encoding_format_utf_8, errors="replace") as f: - source_lines = f.read().splitlines() - - findings = [] - for entry in kingfisher_output.get("findings", []): - line_number = entry.get("finding", {}).get("line") - if excluded_secrets and line_number and line_number <= len(source_lines): - line_text = source_lines[line_number - 1] - if any(re.search(pattern, line_text) for pattern in excluded_secrets): - continue - findings.append(_finding_to_dict(entry, scan_path)) - - return findings or None - except Exception as e: - logger.error(f"Error scanning for secrets: {e}") - return None - finally: - for temp_file in (temp_data_file, temp_output_file): - if temp_file and os.path.exists(temp_file.name): - os.remove(temp_file.name) - - def _scan_batch_chunk( chunk: list, excluded_secrets: list, @@ -350,14 +242,22 @@ def detect_secrets_scan_batch( validate: bool = False, chunk_size: int = default_secrets_batch_chunk_size, ) -> dict: - """Scan many in-memory payloads in chunked, single Kingfisher invocations. - - Each payload is written to its own file and scanned with ``--no-dedup`` so - per-payload results match calling ``detect_secrets_scan`` on each payload - individually. Payloads are processed in chunks (writing each to disk and - releasing it as it is consumed) to bound peak temp-disk and memory use while - amortizing the per-process spawn cost across many fragments. This is the - batched equivalent of looping ``detect_secrets_scan`` per fragment. + """Scan many payloads with Kingfisher in chunked subprocess invocations. + + This is the scan entry point used by every secret check. Each payload is + written to its own file and scanned with ``--no-dedup`` so per-payload + results match scanning each payload on its own. Payloads are processed in + chunks (writing each to disk and releasing it as it is consumed) to bound + peak temp-disk and memory use while amortizing the per-process spawn cost + across many fragments. + + By default the scan runs fully offline (``--no-validate``, + ``--no-update-check``): no network calls are made, so the scanned data is + never sent anywhere. When ``validate`` is True, Kingfisher additionally + checks whether each discovered secret is live by authenticating with it + against the provider's API (the secret itself is the credential; no extra + permissions are required). That makes outbound network calls, so it must be + explicitly opted in. Args: payloads: a mapping ``{key: data}`` or any iterable of ``(key, data)`` @@ -370,7 +270,8 @@ def detect_secrets_scan_batch( chunk_size (int): payloads scanned per Kingfisher invocation. Returns: dict mapping each key that produced findings to its list of finding - dicts (same shape as ``detect_secrets_scan``). Keys with no findings are + dicts, each with ``filename``, ``line_number``, ``type``, + ``hashed_secret`` and ``is_verified`` keys. Keys with no findings are omitted. """ items = payloads.items() if hasattr(payloads, "items") else payloads diff --git a/tests/lib/utils/utils_test.py b/tests/lib/utils/utils_test.py index 88724db9abf..98262a875d5 100644 --- a/tests/lib/utils/utils_test.py +++ b/tests/lib/utils/utils_test.py @@ -4,10 +4,9 @@ from time import mktime import pytest -from mock import Mock, patch +from mock import patch from prowler.lib.utils.utils import ( - detect_secrets_scan, detect_secrets_scan_batch, file_exists, get_file_permissions, @@ -109,114 +108,6 @@ def test_validate_ip_address(self): assert not validate_ip_address("Not an IP") -class Test_detect_secrets_scan: - def test_detect_secrets_scan_data(self): - data = 'password = "Tr0ub4dor3xKq9vLmZ"' - secrets_detected = detect_secrets_scan(data=data, excluded_secrets=[]) - assert type(secrets_detected) is list - assert len(secrets_detected) == 1 - assert "filename" in secrets_detected[0] - assert "hashed_secret" in secrets_detected[0] - assert "is_verified" in secrets_detected[0] - assert secrets_detected[0]["line_number"] == 1 - assert secrets_detected[0]["type"] == "Generic Password" - - def test_detect_secrets_scan_no_secrets_data(self): - data = "" - assert detect_secrets_scan(data=data) is None - - def test_detect_secrets_scan_file_with_secrets(self): - temp_data_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt") - temp_data_file.write(b'password = "Tr0ub4dor3xKq9vLmZ"\n') - temp_data_file.seek(0) - secrets_detected = detect_secrets_scan( - file=temp_data_file.name, excluded_secrets=[] - ) - assert type(secrets_detected) is list - assert len(secrets_detected) == 1 - assert "filename" in secrets_detected[0] - assert "hashed_secret" in secrets_detected[0] - assert "is_verified" in secrets_detected[0] - assert secrets_detected[0]["line_number"] == 1 - assert secrets_detected[0]["type"] == "Generic Password" - os.remove(temp_data_file.name) - - def test_detect_secrets_scan_file_no_secrets(self): - temp_data_file = tempfile.NamedTemporaryFile(delete=False) - temp_data_file.write(b"no secrets") - temp_data_file.seek(0) - assert detect_secrets_scan(file=temp_data_file.name) is None - os.remove(temp_data_file.name) - - def test_detect_secrets_using_regex(self): - data = "MYSQL_ALLOW_EMPTY_PASSWORD=password" - secrets_detected = detect_secrets_scan( - data=data, excluded_secrets=[".*password"] - ) - assert secrets_detected is None - - def test_detect_secrets_using_regex_file(self): - temp_data_file = tempfile.NamedTemporaryFile(delete=False) - temp_data_file.write(b"MYSQL_ALLOW_EMPTY_PASSWORD=password") - temp_data_file.seek(0) - secrets_detected = detect_secrets_scan( - file=temp_data_file.name, excluded_secrets=[".*password"] - ) - assert secrets_detected is None - os.remove(temp_data_file.name) - - def test_detect_secrets_secrets_using_regex(self): - # Two secrets on separate lines; exclude the line with the - # ALLOW_EMPTY_PASSWORD key, leaving only the MYSQL_PASSWORD secret. - data = ( - 'MYSQL_ALLOW_EMPTY_PASSWORD="Tr0ub4dor3xKq9vLmZ"\n' - 'MYSQL_PASSWORD="Xy9zPq2wKmRtVbN4Lm"' - ) - secrets_detected = detect_secrets_scan( - data=data, excluded_secrets=[".*ALLOW_EMPTY_PASSWORD.*"] - ) - assert type(secrets_detected) is list - assert len(secrets_detected) == 1 - assert "filename" in secrets_detected[0] - assert "hashed_secret" in secrets_detected[0] - assert "is_verified" in secrets_detected[0] - assert secrets_detected[0]["line_number"] == 2 - assert secrets_detected[0]["type"] == "Generic Password" - - def test_detect_secrets_scan_offline_by_default(self): - # By default the scan is fully offline: --no-validate is passed and no - # validation flags are added. - with ( - patch( - "prowler.lib.utils.utils.get_kingfisher_binary", - return_value="kingfisher", - ), - patch("prowler.lib.utils.utils.subprocess.run") as mock_run, - ): - mock_run.return_value = Mock(returncode=0, stdout="", stderr="") - detect_secrets_scan(data="password = 'value'") - command = mock_run.call_args[0][0] - assert "--no-validate" in command - assert "--validation-timeout" not in command - - def test_detect_secrets_scan_validate_enabled(self): - # With validate=True, --no-validate is dropped and conservative - # validation flags are added. - with ( - patch( - "prowler.lib.utils.utils.get_kingfisher_binary", - return_value="kingfisher", - ), - patch("prowler.lib.utils.utils.subprocess.run") as mock_run, - ): - mock_run.return_value = Mock(returncode=0, stdout="", stderr="") - detect_secrets_scan(data="password = 'value'", validate=True) - command = mock_run.call_args[0][0] - assert "--no-validate" not in command - assert "--validation-timeout" in command - assert "--validation-retries" in command - - class Test_detect_secrets_scan_batch: def test_batch_returns_findings_per_key(self): results = detect_secrets_scan_batch( From c01297a0bb969dff85dd842011c6341bf74550b0 Mon Sep 17 00:00:00 2001 From: Daniel Barranquero Date: Thu, 25 Jun 2026 16:04:39 +0200 Subject: [PATCH 15/20] fix(sdk): bound the kingfisher subprocess with a timeout and type the batch helper --- prowler/lib/utils/utils.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/prowler/lib/utils/utils.py b/prowler/lib/utils/utils.py index c7d6ffccb6a..6d5db87c7d4 100644 --- a/prowler/lib/utils/utils.py +++ b/prowler/lib/utils/utils.py @@ -20,7 +20,7 @@ from ipaddress import ip_address from os.path import exists from time import mktime -from typing import Any, Optional +from typing import Any, Iterable, Mapping, Optional, Union from colorama import Style @@ -43,6 +43,10 @@ # across many fragments (see detect_secrets_scan_batch). default_secrets_batch_chunk_size = 500 +# Wall-clock cap (seconds) for a single Kingfisher subprocess, so a hung binary +# cannot block the audit indefinitely. +default_secrets_scan_timeout = 300 + @lru_cache(maxsize=1) def get_kingfisher_binary() -> str: @@ -192,7 +196,12 @@ def _scan_batch_chunk( command = _build_kingfisher_command( [tmp_dir], temp_output_file.name, confidence, validate, no_dedup=True ) - process = subprocess.run(command, capture_output=True, text=True) + process = subprocess.run( + command, + capture_output=True, + text=True, + timeout=default_secrets_scan_timeout, + ) if process.returncode not in _kingfisher_success_exit_codes: logger.error( f"Error scanning for secrets: Kingfisher exited with code " @@ -236,8 +245,8 @@ def _scan_batch_chunk( def detect_secrets_scan_batch( - payloads, - excluded_secrets: list[str] = None, + payloads: Union[Mapping[Any, str], Iterable[tuple[Any, str]]], + excluded_secrets: Optional[list[str]] = None, confidence: str = default_secrets_confidence, validate: bool = False, chunk_size: int = default_secrets_batch_chunk_size, From 0e9731a45ffee46d4421b86e9a0082cdb9e04eff Mon Sep 17 00:00:00 2001 From: Daniel Barranquero Date: Thu, 25 Jun 2026 16:04:43 +0200 Subject: [PATCH 16/20] test(sdk): cover the verified-secret escalation path across secret checks --- ...a_function_no_secrets_in_variables_test.py | 65 +++++++++++++++ ...ld_project_no_secrets_in_variables_test.py | 82 ++++++++++++++++++- .../ec2_instance_secrets_user_data_test.py | 7 +- .../ec2_launch_template_no_secrets_test.py | 75 +++++++++++++++++ ...e_snapshot_metadata_sensitive_data_test.py | 55 +++++++++++++ ...age_volume_metadata_sensitive_data_test.py | 62 ++++++++++++++ ...e_instance_metadata_sensitive_data_test.py | 69 ++++++++++++++++ ..._container_metadata_sensitive_data_test.py | 61 ++++++++++++++ 8 files changed, 473 insertions(+), 3 deletions(-) diff --git a/tests/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables_test.py b/tests/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables_test.py index 9eb49b7e578..df8c3a29da7 100644 --- a/tests/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables_test.py +++ b/tests/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables_test.py @@ -240,6 +240,71 @@ def test_function_secrets_in_variables_telegram_token(self): ) assert result[0].resource_tags == [] + def test_function_with_verified_secret(self): + from prowler.lib.check.models import Severity + + lambda_client = mock.MagicMock + function_name = "test-lambda" + function_runtime = "nodejs4.3" + function_arn = f"arn:aws:lambda:{AWS_REGION_US_EAST_1}:{AWS_ACCOUNT_NUMBER}:function/{function_name}" + lambda_client.audit_config = { + "secrets_ignore_patterns": [], + "secrets_validate": True, + } + + lambda_client.functions = { + "function_name": Function( + name=function_name, + security_groups=[], + arn=function_arn, + region=AWS_REGION_US_EAST_1, + runtime=function_runtime, + environment={"db_password": "test-value"}, + ) + } + + with ( + mock.patch( + "prowler.providers.common.provider.Provider.get_global_provider", + return_value=set_mocked_aws_provider( + audit_config={"secrets_validate": True} + ), + ), + mock.patch( + "prowler.providers.aws.services.awslambda.awslambda_function_no_secrets_in_variables.awslambda_function_no_secrets_in_variables.awslambda_client", + new=lambda_client, + ), + mock.patch( + "prowler.providers.aws.services.awslambda.awslambda_function_no_secrets_in_variables.awslambda_function_no_secrets_in_variables.detect_secrets_scan_batch", + return_value={ + 0: [ + { + "type": "JSON Web Token (base64url-encoded)", + "line_number": 2, + "filename": "data", + "hashed_secret": "x", + "is_verified": True, + } + ] + }, + ) as mock_scan, + ): + # Test Check + from prowler.providers.aws.services.awslambda.awslambda_function_no_secrets_in_variables.awslambda_function_no_secrets_in_variables import ( + awslambda_function_no_secrets_in_variables, + ) + + check = awslambda_function_no_secrets_in_variables() + result = check.execute() + + # The check must forward secrets_validate from the config to the scan. + assert mock_scan.call_args.kwargs.get("validate") is True + assert len(result) == 1 + assert result[0].status == "FAIL" + assert result[0].check_metadata.Severity == Severity.critical + assert "confirmed to be live" in result[0].status_extended + assert result[0].resource_id == function_name + def test_function_no_secrets_in_variables(self): lambda_client = mock.MagicMock function_name = "test-lambda" diff --git a/tests/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables_test.py b/tests/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables_test.py index 957a1ceb63b..a5df692d593 100644 --- a/tests/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables_test.py +++ b/tests/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables_test.py @@ -1,6 +1,10 @@ from unittest import mock -from tests.providers.aws.utils import AWS_ACCOUNT_NUMBER, AWS_REGION_US_EAST_1 +from tests.providers.aws.utils import ( + AWS_ACCOUNT_NUMBER, + AWS_REGION_US_EAST_1, + set_mocked_aws_provider, +) class Test_codebuild_project_no_secrets_in_variables: @@ -253,6 +257,82 @@ def test_project_with_sensitive_plaintext_credentials(self): assert result[0].resource_arn == project_arn assert result[0].resource_tags == [] + def test_project_with_verified_secret(self): + from prowler.lib.check.models import Severity + + codebuild_client = mock.MagicMock() + + from prowler.providers.aws.services.codebuild.codebuild_service import Project + + project_arn = f"arn:aws:codebuild:{AWS_REGION_US_EAST_1}:{AWS_ACCOUNT_NUMBER}:project/SensitiveProject" + codebuild_client.projects = { + project_arn: Project( + name="SensitiveProject", + arn=project_arn, + region=AWS_REGION_US_EAST_1, + last_invoked_time=None, + buildspec=None, + environment_variables=[ + { + "name": "EXAMPLE_VAR", + "value": "ExampleValue", + "type": "PLAINTEXT", + } + ], + tags=[], + ) + } + + codebuild_client.audit_config = { + "excluded_sensitive_environment_variables": [], + "secrets_validate": True, + } + + with ( + mock.patch( + "prowler.providers.common.provider.Provider.get_global_provider", + return_value=set_mocked_aws_provider( + audit_config={"secrets_validate": True} + ), + ), + mock.patch( + "prowler.providers.aws.services.codebuild.codebuild_service.Codebuild", + codebuild_client, + ), + mock.patch( + "prowler.providers.aws.services.codebuild.codebuild_project_no_secrets_in_variables.codebuild_project_no_secrets_in_variables.codebuild_client", + codebuild_client, + ), + mock.patch( + "prowler.providers.aws.services.codebuild.codebuild_project_no_secrets_in_variables.codebuild_project_no_secrets_in_variables.detect_secrets_scan_batch", + return_value={ + (0, 0): [ + { + "type": "JSON Web Token (base64url-encoded)", + "line_number": 1, + "filename": "data", + "hashed_secret": "x", + "is_verified": True, + } + ] + }, + ) as mock_scan, + ): + from prowler.providers.aws.services.codebuild.codebuild_project_no_secrets_in_variables.codebuild_project_no_secrets_in_variables import ( + codebuild_project_no_secrets_in_variables, + ) + + check = codebuild_project_no_secrets_in_variables() + result = check.execute() + + # The check must forward secrets_validate from the config to the scan. + assert mock_scan.call_args.kwargs.get("validate") is True + assert len(result) == 1 + assert result[0].status == "FAIL" + assert result[0].check_metadata.Severity == Severity.critical + assert "confirmed to be live" in result[0].status_extended + assert result[0].resource_id == "SensitiveProject" + def test_project_with_sensitive_plaintext_credentials_exluded(self): codebuild_client = mock.MagicMock diff --git a/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data_test.py b/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data_test.py index eee9a3a0a94..ff6b9bc3e9e 100644 --- a/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data_test.py +++ b/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data_test.py @@ -352,7 +352,8 @@ def test_one_ec2_with_verified_secret(self): from prowler.providers.aws.services.ec2.ec2_service import EC2 aws_provider = set_mocked_aws_provider( - [AWS_REGION_EU_WEST_1, AWS_REGION_US_EAST_1] + [AWS_REGION_EU_WEST_1, AWS_REGION_US_EAST_1], + audit_config={"secrets_validate": True}, ) with ( @@ -377,7 +378,7 @@ def test_one_ec2_with_verified_secret(self): } ] }, - ), + ) as mock_scan, ): from prowler.providers.aws.services.ec2.ec2_instance_secrets_user_data.ec2_instance_secrets_user_data import ( ec2_instance_secrets_user_data, @@ -386,6 +387,8 @@ def test_one_ec2_with_verified_secret(self): check = ec2_instance_secrets_user_data() result = check.execute() + # The check must forward secrets_validate from the config to the scan. + assert mock_scan.call_args.kwargs.get("validate") is True assert len(result) == 1 assert result[0].status == "FAIL" assert result[0].check_metadata.Severity == Severity.critical diff --git a/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets_test.py b/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets_test.py index 3cf7655585f..4803a53993f 100644 --- a/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets_test.py +++ b/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets_test.py @@ -393,6 +393,81 @@ def test_one_launch_template_with_secrets_gzip(self): ) assert result[0].resource_tags == [] + def test_one_launch_template_with_verified_secret(self): + from prowler.lib.check.models import Severity + + ec2_client = mock.MagicMock() + launch_template_name = "tester" + launch_template_id = "lt-1234567890" + launch_template_arn = ( + f"arn:aws:ec2:us-east-1:123456789012:launch-template/{launch_template_id}" + ) + + launch_template_data = TemplateData( + user_data=b64encode( + "This is some user_data".encode(encoding_format_utf_8) + ).decode(encoding_format_utf_8), + associate_public_ip_address=True, + ) + + launch_template_versions = [ + LaunchTemplateVersion( + version_number=1, + template_data=launch_template_data, + ), + ] + + launch_template = LaunchTemplate( + name=launch_template_name, + id=launch_template_id, + arn=launch_template_arn, + region=AWS_REGION_US_EAST_1, + versions=launch_template_versions, + ) + + ec2_client.launch_templates = [launch_template] + ec2_client.audit_config = {"secrets_validate": True} + + with ( + mock.patch( + "prowler.providers.common.provider.Provider.get_global_provider", + return_value=ec2_client, + ), + mock.patch( + "prowler.providers.aws.services.ec2.ec2_launch_template_no_secrets.ec2_launch_template_no_secrets.ec2_client", + new=ec2_client, + ), + mock.patch( + "prowler.providers.aws.services.ec2.ec2_launch_template_no_secrets.ec2_launch_template_no_secrets.detect_secrets_scan_batch", + return_value={ + (0, 0): [ + { + "type": "JSON Web Token (base64url-encoded)", + "line_number": 1, + "filename": "data", + "hashed_secret": "x", + "is_verified": True, + } + ] + }, + ) as mock_scan, + ): + # Test Check + from prowler.providers.aws.services.ec2.ec2_launch_template_no_secrets.ec2_launch_template_no_secrets import ( + ec2_launch_template_no_secrets, + ) + + check = ec2_launch_template_no_secrets() + result = check.execute() + + # The check must forward secrets_validate from the config to the scan. + assert mock_scan.call_args.kwargs.get("validate") is True + assert len(result) == 1 + assert result[0].status == "FAIL" + assert result[0].check_metadata.Severity == Severity.critical + assert "confirmed to be live" in result[0].status_extended + assert result[0].resource_id == launch_template_id + @mock_aws def test_one_launch_template_without_user_data(self): launch_template_name = "tester" diff --git a/tests/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data_test.py b/tests/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data_test.py index 7ed8315dc3e..85b97da0a23 100644 --- a/tests/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data_test.py +++ b/tests/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data_test.py @@ -2,6 +2,7 @@ from unittest import mock +from prowler.lib.check.models import Severity from prowler.providers.openstack.services.blockstorage.blockstorage_service import ( SnapshotResource, ) @@ -352,3 +353,57 @@ def test_snapshot_metadata_key_correct_identification(self): # Verify the secret is correctly attributed to 'db_password' key assert "in metadata key 'db_password'" in result[0].status_extended assert result[0].resource_id == "snap-6" + + def test_snapshot_verified_secret_escalates_to_critical(self): + """Test that a confirmed live secret escalates the finding to CRITICAL (FAIL).""" + blockstorage_client = mock.MagicMock() + blockstorage_client.audit_config = {"secrets_validate": True} + blockstorage_client.snapshots = [ + SnapshotResource( + id="snap-verified", + name="Verified Secret", + status="available", + size=50, + volume_id="vol-1", + metadata={"api_key": "placeholder"}, + project_id=OPENSTACK_PROJECT_ID, + region=OPENSTACK_REGION, + ) + ] + + with ( + mock.patch( + "prowler.providers.common.provider.Provider.get_global_provider", + return_value=set_mocked_openstack_provider(), + ), + mock.patch( + "prowler.providers.openstack.services.blockstorage.blockstorage_snapshot_metadata_sensitive_data.blockstorage_snapshot_metadata_sensitive_data.blockstorage_client", + new=blockstorage_client, + ), + mock.patch( + "prowler.providers.openstack.services.blockstorage.blockstorage_snapshot_metadata_sensitive_data.blockstorage_snapshot_metadata_sensitive_data.detect_secrets_scan_batch", + return_value={ + 0: [ + { + "type": "JSON Web Token (base64url-encoded)", + "line_number": 2, + "filename": "data", + "hashed_secret": "x", + "is_verified": True, + } + ] + }, + ) as mock_scan, + ): + from prowler.providers.openstack.services.blockstorage.blockstorage_snapshot_metadata_sensitive_data.blockstorage_snapshot_metadata_sensitive_data import ( + blockstorage_snapshot_metadata_sensitive_data, + ) + + check = blockstorage_snapshot_metadata_sensitive_data() + result = check.execute() + + assert len(result) == 1 + assert result[0].status == "FAIL" + assert result[0].check_metadata.Severity == Severity.critical + assert "confirmed to be live" in result[0].status_extended + assert mock_scan.call_args.kwargs.get("validate") is True diff --git a/tests/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data_test.py b/tests/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data_test.py index 5ee9eac49db..80927e2f9dd 100644 --- a/tests/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data_test.py +++ b/tests/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data_test.py @@ -2,6 +2,7 @@ from unittest import mock +from prowler.lib.check.models import Severity from prowler.providers.openstack.services.blockstorage.blockstorage_service import ( VolumeResource, ) @@ -408,3 +409,64 @@ def test_volume_metadata_key_correct_identification(self): # Verify the secret is correctly attributed to 'db_password' key assert "in metadata key 'db_password'" in result[0].status_extended assert result[0].resource_id == "vol-6" + + def test_volume_verified_secret_escalates_to_critical(self): + """Test that a confirmed live secret escalates the finding to CRITICAL (FAIL).""" + blockstorage_client = mock.MagicMock() + blockstorage_client.audit_config = {"secrets_validate": True} + blockstorage_client.volumes = [ + VolumeResource( + id="vol-verified", + name="Verified Secret", + status="in-use", + size=100, + volume_type="standard", + is_encrypted=False, + is_bootable=False, + is_multiattach=False, + attachments=[], + metadata={"api_key": "placeholder"}, + availability_zone="nova", + snapshot_id="", + source_volume_id="", + project_id=OPENSTACK_PROJECT_ID, + region=OPENSTACK_REGION, + ) + ] + + with ( + mock.patch( + "prowler.providers.common.provider.Provider.get_global_provider", + return_value=set_mocked_openstack_provider(), + ), + mock.patch( + "prowler.providers.openstack.services.blockstorage.blockstorage_volume_metadata_sensitive_data.blockstorage_volume_metadata_sensitive_data.blockstorage_client", + new=blockstorage_client, + ), + mock.patch( + "prowler.providers.openstack.services.blockstorage.blockstorage_volume_metadata_sensitive_data.blockstorage_volume_metadata_sensitive_data.detect_secrets_scan_batch", + return_value={ + 0: [ + { + "type": "JSON Web Token (base64url-encoded)", + "line_number": 2, + "filename": "data", + "hashed_secret": "x", + "is_verified": True, + } + ] + }, + ) as mock_scan, + ): + from prowler.providers.openstack.services.blockstorage.blockstorage_volume_metadata_sensitive_data.blockstorage_volume_metadata_sensitive_data import ( + blockstorage_volume_metadata_sensitive_data, + ) + + check = blockstorage_volume_metadata_sensitive_data() + result = check.execute() + + assert len(result) == 1 + assert result[0].status == "FAIL" + assert result[0].check_metadata.Severity == Severity.critical + assert "confirmed to be live" in result[0].status_extended + assert mock_scan.call_args.kwargs.get("validate") is True diff --git a/tests/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data_test.py b/tests/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data_test.py index c1c6c99dcd5..790f8aa0972 100644 --- a/tests/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data_test.py +++ b/tests/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data_test.py @@ -2,6 +2,7 @@ from unittest import mock +from prowler.lib.check.models import Severity from prowler.providers.openstack.services.compute.compute_service import ComputeInstance from tests.providers.openstack.openstack_fixtures import ( OPENSTACK_PROJECT_ID, @@ -578,3 +579,71 @@ def test_instance_metadata_key_ordering(self): # Verify the secret is correctly attributed to 'api_key' key (second in order) assert "in metadata key 'api_key'" in result[0].status_extended assert result[0].resource_id == "instance-8" + + def test_instance_verified_secret_escalates_to_critical(self): + """Test that a confirmed live secret escalates the finding to CRITICAL (FAIL).""" + compute_client = mock.MagicMock() + compute_client.audit_config = {"secrets_validate": True} + compute_client.instances = [ + ComputeInstance( + id="instance-verified", + name="Verified Secret", + status="ACTIVE", + flavor_id="flavor-1", + security_groups=["default"], + region=OPENSTACK_REGION, + project_id=OPENSTACK_PROJECT_ID, + is_locked=False, + locked_reason="", + key_name="", + user_id="", + access_ipv4="", + access_ipv6="", + public_v4="", + public_v6="", + private_v4="", + private_v6="", + networks={}, + has_config_drive=False, + metadata={"api_key": "placeholder"}, + user_data="", + trusted_image_certificates=[], + ) + ] + + with ( + mock.patch( + "prowler.providers.common.provider.Provider.get_global_provider", + return_value=set_mocked_openstack_provider(), + ), + mock.patch( + "prowler.providers.openstack.services.compute.compute_instance_metadata_sensitive_data.compute_instance_metadata_sensitive_data.compute_client", + new=compute_client, + ), + mock.patch( + "prowler.providers.openstack.services.compute.compute_instance_metadata_sensitive_data.compute_instance_metadata_sensitive_data.detect_secrets_scan_batch", + return_value={ + 0: [ + { + "type": "JSON Web Token (base64url-encoded)", + "line_number": 2, + "filename": "data", + "hashed_secret": "x", + "is_verified": True, + } + ] + }, + ) as mock_scan, + ): + from prowler.providers.openstack.services.compute.compute_instance_metadata_sensitive_data.compute_instance_metadata_sensitive_data import ( + compute_instance_metadata_sensitive_data, + ) + + check = compute_instance_metadata_sensitive_data() + result = check.execute() + + assert len(result) == 1 + assert result[0].status == "FAIL" + assert result[0].check_metadata.Severity == Severity.critical + assert "confirmed to be live" in result[0].status_extended + assert mock_scan.call_args.kwargs.get("validate") is True diff --git a/tests/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data_test.py b/tests/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data_test.py index 96eb55caf0c..eb922742320 100644 --- a/tests/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data_test.py +++ b/tests/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data_test.py @@ -2,6 +2,7 @@ from unittest import mock +from prowler.lib.check.models import Severity from prowler.providers.openstack.services.objectstorage.objectstorage_service import ( ObjectStorageContainer, ) @@ -241,3 +242,63 @@ def test_multiple_containers_mixed(self): assert len(result) == 2 assert len([r for r in result if r.status == "PASS"]) == 1 assert len([r for r in result if r.status == "FAIL"]) == 1 + + def test_container_verified_secret_escalates_to_critical(self): + """Test that a confirmed live secret escalates the finding to CRITICAL (FAIL).""" + objectstorage_client = mock.MagicMock() + objectstorage_client.audit_config = {"secrets_validate": True} + objectstorage_client.containers = [ + ObjectStorageContainer( + id="container-verified", + name="verified-secret", + region=OPENSTACK_REGION, + project_id=OPENSTACK_PROJECT_ID, + object_count=0, + bytes_used=0, + read_ACL="", + write_ACL="", + versioning_enabled=False, + versions_location="", + history_location="", + sync_to="", + sync_key="", + metadata={"api_key": "placeholder"}, + ) + ] + + with ( + mock.patch( + "prowler.providers.common.provider.Provider.get_global_provider", + return_value=set_mocked_openstack_provider(), + ), + mock.patch( + "prowler.providers.openstack.services.objectstorage.objectstorage_container_metadata_sensitive_data.objectstorage_container_metadata_sensitive_data.objectstorage_client", + new=objectstorage_client, + ), + mock.patch( + "prowler.providers.openstack.services.objectstorage.objectstorage_container_metadata_sensitive_data.objectstorage_container_metadata_sensitive_data.detect_secrets_scan_batch", + return_value={ + 0: [ + { + "type": "JSON Web Token (base64url-encoded)", + "line_number": 2, + "filename": "data", + "hashed_secret": "x", + "is_verified": True, + } + ] + }, + ) as mock_scan, + ): + from prowler.providers.openstack.services.objectstorage.objectstorage_container_metadata_sensitive_data.objectstorage_container_metadata_sensitive_data import ( + objectstorage_container_metadata_sensitive_data, + ) + + check = objectstorage_container_metadata_sensitive_data() + result = check.execute() + + assert len(result) == 1 + assert result[0].status == "FAIL" + assert result[0].check_metadata.Severity == Severity.critical + assert "confirmed to be live" in result[0].status_extended + assert mock_scan.call_args.kwargs.get("validate") is True From b13b3f0b47bd7355cc09e2ee10dba57f805cf957 Mon Sep 17 00:00:00 2001 From: Daniel Barranquero Date: Thu, 25 Jun 2026 16:04:47 +0200 Subject: [PATCH 17/20] docs(sdk): correct secret-scanning docs, changelog and OpenStack check notes --- docs/developer-guide/secret-scanning-checks.mdx | 2 +- docs/user-guide/cli/tutorials/configuration_file.mdx | 4 ++-- prowler/CHANGELOG.md | 2 +- ...lockstorage_snapshot_metadata_sensitive_data.metadata.json | 2 +- .../blockstorage_volume_metadata_sensitive_data.metadata.json | 2 +- .../compute_instance_metadata_sensitive_data.metadata.json | 2 +- ...ectstorage_container_metadata_sensitive_data.metadata.json | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/developer-guide/secret-scanning-checks.mdx b/docs/developer-guide/secret-scanning-checks.mdx index 79794bb5aa9..c74b0a05e76 100644 --- a/docs/developer-guide/secret-scanning-checks.mdx +++ b/docs/developer-guide/secret-scanning-checks.mdx @@ -87,7 +87,7 @@ Derive the indices from the same `list(...)` of resources in both Phase 1 and Ph ## Validation and Severity -Both helpers accept `validate`, read from `secrets_validate` in the provider configuration or the `--scan-secrets-validate` flag. When enabled, Kingfisher confirms whether each secret is live, and confirmed secrets carry `is_verified: True`. +`detect_secrets_scan_batch` accepts `validate`, read from `secrets_validate` in the provider configuration or the `--scan-secrets-validate` flag. When enabled, Kingfisher confirms whether each secret is live, and confirmed secrets carry `is_verified: True`. After marking a report as `FAIL`, pass the findings to `annotate_verified_secrets(report, findings)`. When any secret is verified, the helper escalates the finding to critical severity and appends a note that the secret was confirmed live. Validation stays off by default because it sends the discovered secret to the provider API. diff --git a/docs/user-guide/cli/tutorials/configuration_file.mdx b/docs/user-guide/cli/tutorials/configuration_file.mdx index 3aee9955d8a..8a7550de068 100644 --- a/docs/user-guide/cli/tutorials/configuration_file.mdx +++ b/docs/user-guide/cli/tutorials/configuration_file.mdx @@ -2,6 +2,8 @@ title: "Configuration File" --- +import { VersionBadge } from "/snippets/version-badge.mdx" + Several Prowler's checks have user configurable variables that can be modified in a common **configuration file**. This file can be found in the following [path](https://github.com/prowler-cloud/prowler/blob/master/prowler/config/config.yaml): ``` @@ -89,8 +91,6 @@ The following list includes all the AWS checks with configurable variables that ### Validating Discovered Secrets -import { VersionBadge } from "/snippets/version-badge.mdx" - By default, the secret-scanning checks run fully offline: secrets are detected but never sent anywhere. Setting `secrets_validate` to `True` additionally confirms whether each discovered secret is live by authenticating with it against the corresponding provider API. The discovered secret itself serves as the credential, so Prowler requires no additional permissions to validate it. diff --git a/prowler/CHANGELOG.md b/prowler/CHANGELOG.md index da095a16e23..d17822ebac2 100644 --- a/prowler/CHANGELOG.md +++ b/prowler/CHANGELOG.md @@ -8,7 +8,7 @@ All notable changes to the **Prowler SDK** are documented in this file. - `entra_conditional_access_policy_explicitly_targets_azure_devops` check for M365 provider, verifying at least one enabled Conditional Access policy explicitly includes the Azure DevOps cloud application instead of relying on a broad "All cloud apps" policy [(#11182)](https://github.com/prowler-cloud/prowler/pull/11182) - `entra_conditional_access_policy_no_exclusion_gaps` check for M365 provider, verifying every user, group, role, or application excluded from an enabled Conditional Access policy stays in scope of another enabled policy [(#11577)](https://github.com/prowler-cloud/prowler/pull/11577) -- `--scan-secrets-validate` flag and `secrets_validate` configuration option to optionally validate the secrets discovered by the secret-scanning checks against the provider APIs; secrets confirmed to be live are reported as critical [(#11694)](https://github.com/prowler-cloud/prowler/pull/11694) +- `--scan-secrets-validate` flag and `aws.secrets_validate` configuration option to optionally validate the secrets discovered by the secret-scanning checks against the provider APIs; secrets confirmed to be live are reported as critical [(#11694)](https://github.com/prowler-cloud/prowler/pull/11694) ### 🔄 Changed diff --git a/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.metadata.json b/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.metadata.json index 60043831cbd..bd83049d826 100644 --- a/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.metadata.json +++ b/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.metadata.json @@ -36,5 +36,5 @@ "RelatedTo": [ "blockstorage_volume_metadata_sensitive_data" ], - "Notes": "This check uses the detect-secrets library to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns." + "Notes": "This check uses Kingfisher to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns." } diff --git a/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.metadata.json b/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.metadata.json index cda2d8f89ef..79874db214d 100644 --- a/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.metadata.json +++ b/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.metadata.json @@ -34,5 +34,5 @@ ], "DependsOn": [], "RelatedTo": [], - "Notes": "This check uses the detect-secrets library to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns." + "Notes": "This check uses Kingfisher to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns." } diff --git a/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.metadata.json b/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.metadata.json index 89d82e7a484..c7f3e41f8e9 100644 --- a/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.metadata.json +++ b/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.metadata.json @@ -34,5 +34,5 @@ ], "DependsOn": [], "RelatedTo": [], - "Notes": "This check uses the detect-secrets library to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns. Metadata is world-readable within instance via 169.254.169.254." + "Notes": "This check uses Kingfisher to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns. Metadata is world-readable within instance via 169.254.169.254." } diff --git a/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.metadata.json b/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.metadata.json index 36055362cb0..37e7563c27c 100644 --- a/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.metadata.json +++ b/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.metadata.json @@ -35,5 +35,5 @@ ], "DependsOn": [], "RelatedTo": [], - "Notes": "This check uses the detect-secrets library to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns." + "Notes": "This check uses Kingfisher to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns." } From d9b9bd72ac93b28a4334acad0ac73d4801fd059b Mon Sep 17 00:00:00 2001 From: Daniel Barranquero Date: Thu, 25 Jun 2026 16:50:00 +0200 Subject: [PATCH 18/20] docs(sdk): add version badge and engine-history note to secret-scanning checks guide --- docs/developer-guide/secret-scanning-checks.mdx | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/developer-guide/secret-scanning-checks.mdx b/docs/developer-guide/secret-scanning-checks.mdx index c74b0a05e76..161c5a45555 100644 --- a/docs/developer-guide/secret-scanning-checks.mdx +++ b/docs/developer-guide/secret-scanning-checks.mdx @@ -2,8 +2,16 @@ title: 'Secret-Scanning Checks' --- +import { VersionBadge } from "/snippets/version-badge.mdx" + + + Prowler scans audited resources for plaintext secrets using [Kingfisher](https://github.com/mongodb/kingfisher), an open-source secret-scanning engine that Prowler invokes as a subprocess. This guide explains the structure every secret-scanning check must follow to keep scanning correct and efficient on large accounts. + +Since Prowler 5.32.0 the secret-scanning checks scan with Kingfisher. Earlier versions used the `detect-secrets` library. + + ## Overview Secret detection runs through a single helper in `prowler/lib/utils/utils.py`: From faa50ee10ab11a95cc101bd923d66f68a0a39916 Mon Sep 17 00:00:00 2001 From: Daniel Barranquero Date: Fri, 26 Jun 2026 09:15:01 +0200 Subject: [PATCH 19/20] fix(sdk): apply ignore patterns to the cloudwatch multiline-event rescan --- .../cloudwatch_log_group_no_secrets_in_logs.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py b/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py index a790774a5cc..87b76dfbd03 100644 --- a/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py +++ b/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py @@ -86,7 +86,11 @@ def stream_payloads(): # is never enabled here: this rescan only resolves line numbers for # display and must not re-authenticate the secret. rescan_results = ( - detect_secrets_scan_batch(rescan_payloads) if rescan_payloads else {} + detect_secrets_scan_batch( + rescan_payloads, excluded_secrets=secrets_ignore_patterns + ) + if rescan_payloads + else {} ) # Phase 4: assemble one report per log group. From 869a08eea331707ea6007f3d722425ff0457744f Mon Sep 17 00:00:00 2001 From: Daniel Barranquero Date: Fri, 26 Jun 2026 09:15:03 +0200 Subject: [PATCH 20/20] fix(sdk): report undecodable user data as MANUAL instead of dropping the resource --- docs/developer-guide/secret-scanning-checks.mdx | 2 +- ...ling_find_secrets_ec2_launch_configuration.py | 15 ++++++++------- .../ec2_instance_secrets_user_data.py | 15 +++++++++------ .../ec2_launch_template_no_secrets.py | 16 +++++++++++++++- ...find_secrets_ec2_launch_configuration_test.py | 8 ++++++-- .../ec2_instance_secrets_user_data_test.py | 4 +++- .../ec2_launch_template_no_secrets_test.py | 6 +++--- 7 files changed, 45 insertions(+), 21 deletions(-) diff --git a/docs/developer-guide/secret-scanning-checks.mdx b/docs/developer-guide/secret-scanning-checks.mdx index 161c5a45555..5044366b7ec 100644 --- a/docs/developer-guide/secret-scanning-checks.mdx +++ b/docs/developer-guide/secret-scanning-checks.mdx @@ -34,7 +34,7 @@ Call `detect_secrets_scan_batch` once with the generator. The helper consumes it ### Phase 3: Report -Iterate the resources, look up the findings by key, and build one report per resource. +Iterate the resources, look up the findings by key, and build one report per resource. Emit a finding for **every** iterated resource — never drop one silently. When a resource's payload cannot be prepared for scanning (for example, user data that fails to base64-decode or decompress), report it as `MANUAL` with a status explaining the scan could not inspect it, rather than omitting it or claiming `PASS`. ```python from prowler.lib.check.models import Check, Check_Report_AWS diff --git a/prowler/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration.py b/prowler/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration.py index 3956702df42..cdb7d3e8446 100644 --- a/prowler/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration.py +++ b/prowler/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration.py @@ -24,9 +24,9 @@ def execute(self): # Collect the decoded User Data of each launch configuration and scan it # all in batched Kingfisher invocations instead of one subprocess each. - # Configurations whose User Data cannot be decoded are skipped (no report), + # Configurations whose User Data cannot be decoded are undecodable (no report), # matching the original per-resource behavior. - skipped = set() + undecodable = set() def payloads(): for index, configuration in enumerate(configurations): @@ -44,13 +44,13 @@ def payloads(): logger.warning( f"{configuration.region} -- Unable to decode user data in autoscaling launch configuration {configuration.name}: {error}" ) - skipped.add(index) + undecodable.add(index) continue except Exception as error: logger.error( f"{configuration.region} -- {error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}" ) - skipped.add(index) + undecodable.add(index) continue yield index, user_data @@ -59,11 +59,12 @@ def payloads(): ) for index, configuration in enumerate(configurations): - if index in skipped: - continue report = Check_Report_AWS(metadata=self.metadata(), resource=configuration) - if configuration.user_data: + if index in undecodable: + report.status = "MANUAL" + report.status_extended = f"Could not decode User Data for autoscaling {configuration.name}; manual review is required to scan for secrets." + elif configuration.user_data: has_secrets = batch_results.get(index) if has_secrets: report.status = "FAIL" diff --git a/prowler/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data.py b/prowler/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data.py index 4c817d86a86..9287a7a03c4 100644 --- a/prowler/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data.py +++ b/prowler/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data.py @@ -22,9 +22,9 @@ def execute(self): # Collect the decoded User Data of each non-terminated instance and scan # it all in batched Kingfisher invocations instead of one subprocess each. - # Instances whose User Data cannot be decoded are skipped (no report), + # Instances whose User Data cannot be decoded are undecodable (no report), # matching the original per-resource behavior. - skipped = set() + undecodable = set() def payloads(): for index, instance in enumerate(instances): @@ -42,13 +42,13 @@ def payloads(): logger.warning( f"{instance.region} -- Unable to decode user data in EC2 instance {instance.id}: {error}" ) - skipped.add(index) + undecodable.add(index) continue except Exception as error: logger.error( f"{instance.region} -- {error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}" ) - skipped.add(index) + undecodable.add(index) continue yield index, user_data @@ -57,10 +57,13 @@ def payloads(): ) for index, instance in enumerate(instances): - if instance.state == "terminated" or index in skipped: + if instance.state == "terminated": continue report = Check_Report_AWS(metadata=self.metadata(), resource=instance) - if instance.user_data: + if index in undecodable: + report.status = "MANUAL" + report.status_extended = f"Could not decode User Data for EC2 instance {instance.id}; manual review is required to scan for secrets." + elif instance.user_data: detect_secrets_output = batch_results.get(index) if detect_secrets_output: secrets_string = ", ".join( diff --git a/prowler/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets.py b/prowler/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets.py index 13dc9744333..029f9c34a1d 100644 --- a/prowler/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets.py +++ b/prowler/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets.py @@ -20,9 +20,13 @@ def execute(self): validate = ec2_client.audit_config.get("secrets_validate", False) templates = list(ec2_client.launch_templates) + # Track versions whose User Data cannot be decoded so the template is + # surfaced (MANUAL) instead of silently claiming no secrets were found. + undecodable_versions = {} + # Collect the decoded User Data of every (template, version) and scan it # all in batched Kingfisher invocations instead of one subprocess per - # version. Versions whose User Data cannot be decoded are skipped. + # version. Versions whose User Data cannot be decoded are recorded above. def payloads(): for template_index, template in enumerate(templates): for version_index, version in enumerate(template.versions): @@ -40,11 +44,17 @@ def payloads(): logger.warning( f"{template.region} -- Unable to decode User Data in EC2 Launch Template {template.name} version {version.version_number}: {error}" ) + undecodable_versions.setdefault(template_index, []).append( + version.version_number + ) continue except Exception as error: logger.error( f"{template.region} -- {error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}" ) + undecodable_versions.setdefault(template_index, []).append( + version.version_number + ) continue yield (template_index, version_index), user_data @@ -72,10 +82,14 @@ def payloads(): f"Version {version.version_number}: {secrets_string}" ) + undecodable = undecodable_versions.get(template_index, []) if len(versions_with_secrets) > 0: report.status = "FAIL" report.status_extended = f"Potential secret found in User Data for EC2 Launch Template {template.name} in template versions: {', '.join(versions_with_secrets)}." annotate_verified_secrets(report, all_secrets) + elif undecodable: + report.status = "MANUAL" + report.status_extended = f"Could not decode User Data for EC2 Launch Template {template.name} versions: {', '.join(str(version_number) for version_number in undecodable)}; manual review is required to scan for secrets." else: report.status = "PASS" report.status_extended = f"No secrets found in User Data of any version for EC2 Launch Template {template.name}." diff --git a/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration_test.py b/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration_test.py index 053e40fe2a7..ec45b89a493 100644 --- a/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration_test.py +++ b/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration_test.py @@ -341,7 +341,9 @@ def test_one_autoscaling_file_with_unicode_error(self): check = autoscaling_find_secrets_ec2_launch_configuration() result = check.execute() - assert len(result) == 0 + assert len(result) == 1 + assert result[0].status == "MANUAL" + assert "Could not decode User Data" in result[0].status_extended @mock_aws def test_one_autoscaling_file_invalid_gzip_error(self): @@ -381,4 +383,6 @@ def test_one_autoscaling_file_invalid_gzip_error(self): check = autoscaling_find_secrets_ec2_launch_configuration() result = check.execute() - assert len(result) == 0 + assert len(result) == 1 + assert result[0].status == "MANUAL" + assert "Could not decode User Data" in result[0].status_extended diff --git a/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data_test.py b/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data_test.py index ff6b9bc3e9e..0745302af9e 100644 --- a/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data_test.py +++ b/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data_test.py @@ -426,4 +426,6 @@ def test_one_secrets_with_unicode_error(self): check = ec2_instance_secrets_user_data() result = check.execute() - assert len(result) == 0 + assert len(result) == 1 + assert result[0].status == "MANUAL" + assert "Could not decode User Data" in result[0].status_extended diff --git a/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets_test.py b/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets_test.py index 4803a53993f..dab6debb6b6 100644 --- a/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets_test.py +++ b/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets_test.py @@ -670,10 +670,10 @@ def test_one_launch_template_with_unicode_error(self): result = check.execute() assert len(result) == 1 - assert result[0].status == "PASS" + assert result[0].status == "MANUAL" assert ( - result[0].status_extended - == f"No secrets found in User Data of any version for EC2 Launch Template {launch_template_name}." + f"Could not decode User Data for EC2 Launch Template {launch_template_name}" + in result[0].status_extended ) assert result[0].resource_id == launch_template_id assert result[0].region == AWS_REGION_US_EAST_1