diff --git a/docs/developer-guide/checks.mdx b/docs/developer-guide/checks.mdx index 2f0f45cd96b..5334799d362 100644 --- a/docs/developer-guide/checks.mdx +++ b/docs/developer-guide/checks.mdx @@ -445,3 +445,5 @@ The metadata structure is enforced in code using a Pydantic model. For reference ## Specific Check Patterns Details for specific providers can be found in documentation pages named using the pattern `-details`. + +Checks that scan resources for plaintext secrets follow a dedicated batched structure. Refer to [Secret-Scanning Checks](/developer-guide/secret-scanning-checks) before creating or updating one. diff --git a/docs/developer-guide/configurable-checks.mdx b/docs/developer-guide/configurable-checks.mdx index 760dc80f586..0cdbaff84b0 100644 --- a/docs/developer-guide/configurable-checks.mdx +++ b/docs/developer-guide/configurable-checks.mdx @@ -149,7 +149,6 @@ Only fields with a numeric range, a fixed value set, or a length cap are listed. | `max_days_secret_unused` | `7..365` days | | | `max_days_secret_unrotated` | `1..180` days | NIST IA-5: rotate quarterly; CIS ≤90 | | `min_kinesis_stream_retention_hours` | `24..8760` h | 1 day .. 1 year | -| `detect_secrets_plugins[].limit` | `0.0..10.0` | Shannon entropy threshold | | `shodan_api_key` | ≤512 chars | | ### Azure diff --git a/docs/developer-guide/secret-scanning-checks.mdx b/docs/developer-guide/secret-scanning-checks.mdx new file mode 100644 index 00000000000..5044366b7ec --- /dev/null +++ b/docs/developer-guide/secret-scanning-checks.mdx @@ -0,0 +1,119 @@ +--- +title: 'Secret-Scanning Checks' +--- + +import { VersionBadge } from "/snippets/version-badge.mdx" + + + +Prowler scans audited resources for plaintext secrets using [Kingfisher](https://github.com/mongodb/kingfisher), an open-source secret-scanning engine that Prowler invokes as a subprocess. This guide explains the structure every secret-scanning check must follow to keep scanning correct and efficient on large accounts. + + +Since Prowler 5.32.0 the secret-scanning checks scan with Kingfisher. Earlier versions used the `detect-secrets` library. + + +## Overview + +Secret detection runs through a single helper in `prowler/lib/utils/utils.py`: + +- **`detect_secrets_scan_batch(payloads, excluded_secrets=..., validate=...)`** scans many payloads in chunked subprocess invocations and returns a `{key: [findings]}` dictionary. To scan a single payload, pass a one-entry mapping (for example, `{0: data}`). + +Every Kingfisher invocation carries a fixed process-startup cost (around 100 ms). Scanning once per resource would spawn thousands of subprocesses on large accounts (for example, thousands of CloudWatch log groups). `detect_secrets_scan_batch` amortizes that cost: it writes each payload to a temporary file as it consumes them, runs one subprocess per chunk (500 payloads by default), and maps the findings back to each payload by key. + +## The Batched Structure + +Every secret-scanning check follows three phases. + +### Phase 1: Collect + +Define a generator that yields `(key, payload)` for each scannable unit. The generator builds payload strings only — it does not call Kingfisher. Lazy yielding keeps memory and temporary-disk usage bounded to a single chunk, which matters when an account holds thousands of resources. + +### Phase 2: Batch + +Call `detect_secrets_scan_batch` once with the generator. The helper consumes it in chunks, runs Kingfisher per chunk, and returns the keys that produced findings mapped to their finding lists. + +### Phase 3: Report + +Iterate the resources, look up the findings by key, and build one report per resource. Emit a finding for **every** iterated resource — never drop one silently. When a resource's payload cannot be prepared for scanning (for example, user data that fails to base64-decode or decompress), report it as `MANUAL` with a status explaining the scan could not inspect it, rather than omitting it or claiming `PASS`. + +```python +from prowler.lib.check.models import Check, Check_Report_AWS +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) +from prowler.providers.aws.services.example.example_client import example_client + + +class example_resource_no_secrets(Check): + def execute(self): + findings = [] + excluded = example_client.audit_config.get("secrets_ignore_patterns", []) + validate = example_client.audit_config.get("secrets_validate", False) + resources = list(example_client.resources) + + # Phase 1: collect — builds strings only, no scan. + def payloads(): + for index, resource in enumerate(resources): + if resource.scannable_data: + yield index, serialize(resource) + + # Phase 2: batch — one call, chunked subprocesses. + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=excluded, validate=validate + ) + + # Phase 3: report — look up findings by key. + for index, resource in enumerate(resources): + report = Check_Report_AWS(metadata=self.metadata(), resource=resource) + report.status = "PASS" + report.status_extended = f"No secrets found in {resource.name}." + detect_secrets_output = batch_results.get(index) + if detect_secrets_output: + report.status = "FAIL" + report.status_extended = ( + f"Potential secret found in {resource.name} -> ..." + ) + annotate_verified_secrets(report, detect_secrets_output) + findings.append(report) + + return findings +``` + +## Choosing the Key + +The key maps each finding back to its source. Two shapes cover every check: + +- **One payload per resource:** use the resource index. This fits checks that serialize a single payload per resource, such as launch configurations, CloudFormation outputs, SSM documents, Step Functions definitions, and OpenStack metadata. +- **Several payloads per resource:** use a `(resource_index, fragment)` tuple, where the fragment identifies the variable, log stream, container, file, or version. Phase 3 groups the per-fragment findings to build the resource report. This fits CloudWatch log streams, ECS containers, CodeBuild variables, Glue arguments, and Lambda code files. + +Derive the indices from the same `list(...)` of resources in both Phase 1 and Phase 3 so the order stays stable and the keys align. + +## Preserving Per-Payload Results + +`detect_secrets_scan_batch` runs Kingfisher with `--no-dedup`, so a secret that appears in more than one payload is reported for each one. This reproduces the result of scanning each payload individually. Build payload strings exactly as a single scan would: serialize the same data and keep line ordering, because messages often map a finding's `line_number` back to a variable name or metadata key. + +## Validation and Severity + +`detect_secrets_scan_batch` accepts `validate`, read from `secrets_validate` in the provider configuration or the `--scan-secrets-validate` flag. When enabled, Kingfisher confirms whether each secret is live, and confirmed secrets carry `is_verified: True`. + +After marking a report as `FAIL`, pass the findings to `annotate_verified_secrets(report, findings)`. When any secret is verified, the helper escalates the finding to critical severity and appends a note that the secret was confirmed live. Validation stays off by default because it sends the discovered secret to the provider API. + +## Excluded Secrets + +`detect_secrets_scan_batch` applies `secrets_ignore_patterns` — regular expressions from the provider configuration — against each finding's source line and drops the matches, mirroring single-scan behavior. + +## Testing + +To assert on the verified-secret path, mock `detect_secrets_scan_batch` in the check module and return the keyed dictionary. For a single resource scanned at index `0`: + +```python +mock.patch( + "prowler.providers.aws.services.example.example_resource_no_secrets.example_resource_no_secrets.detect_secrets_scan_batch", + return_value={ + 0: [{"type": "...", "line_number": 1, "is_verified": True}] + }, +) +``` + +Most tests need no mock at all: they seed resources that contain example secrets and assert on the `FAIL` status and message, which exercises the real batched path. Refer to the [Testing](/developer-guide/unit-testing) documentation for the general structure. diff --git a/docs/docs.json b/docs/docs.json index 5b88b5d2dae..b46e83eee0c 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -396,6 +396,7 @@ "developer-guide/provider", "developer-guide/services", "developer-guide/checks", + "developer-guide/secret-scanning-checks", "developer-guide/outputs", "developer-guide/integrations", "developer-guide/security-compliance-framework", diff --git a/docs/user-guide/cli/tutorials/configuration_file.mdx b/docs/user-guide/cli/tutorials/configuration_file.mdx index 657549f8b0b..8a7550de068 100644 --- a/docs/user-guide/cli/tutorials/configuration_file.mdx +++ b/docs/user-guide/cli/tutorials/configuration_file.mdx @@ -2,6 +2,8 @@ title: "Configuration File" --- +import { VersionBadge } from "/snippets/version-badge.mdx" + Several Prowler's checks have user configurable variables that can be modified in a common **configuration file**. This file can be found in the following [path](https://github.com/prowler-cloud/prowler/blob/master/prowler/config/config.yaml): ``` @@ -87,6 +89,32 @@ The following list includes all the AWS checks with configurable variables that | `opensearch_service_domains_not_publicly_accessible` | `trusted_ips` | List of Strings | +### Validating Discovered Secrets + + + +By default, the secret-scanning checks run fully offline: secrets are detected but never sent anywhere. Setting `secrets_validate` to `True` additionally confirms whether each discovered secret is live by authenticating with it against the corresponding provider API. The discovered secret itself serves as the credential, so Prowler requires no additional permissions to validate it. + +`secrets_validate` applies to every AWS secret-scanning check listed above (those that accept `secrets_ignore_patterns`). The `--scan-secrets-validate` CLI flag is provider-wide: it also enables validation for the secret-scanning checks of other providers, such as the OpenStack metadata checks. + +To enable validation through the configuration file, set the value under the `aws` section: + +```yaml +aws: + secrets_validate: True +``` + +To enable validation for a single scan (any provider), use Prowler CLI: + +``` +prowler aws --scan-secrets-validate +``` + + +Secret validation makes outbound network calls that authenticate with each discovered secret. The credential is exercised against the provider, so the call appears in the audited account's logs and can trigger its monitoring (for example, AWS CloudTrail records the validation request). Validation stays disabled by default so that scans remain fully offline. + + + ## Azure ### Configurable Checks diff --git a/docs/user-guide/cli/tutorials/pentesting.mdx b/docs/user-guide/cli/tutorials/pentesting.mdx index 0ad53136113..35d5b72be77 100644 --- a/docs/user-guide/cli/tutorials/pentesting.mdx +++ b/docs/user-guide/cli/tutorials/pentesting.mdx @@ -6,20 +6,33 @@ Prowler has some checks that analyse pentesting risks (Secrets, Internet Exposed ## Detect Secrets -Prowler uses `detect-secrets` library to search for any secrets that are stores in plaintext within your environment. +Prowler scans for secrets stored in plaintext within the audited environment using [Kingfisher](https://github.com/mongodb/kingfisher), an open-source secret-scanning engine. By default these scans run fully offline, so no data leaves the audited environment. Discovered secrets can optionally be validated against the provider APIs to confirm whether they are live — see [Validating Discovered Secrets](/user-guide/cli/tutorials/configuration_file#validating-discovered-secrets). -The actual checks that have this functionality are the following: +The checks with this functionality are the following. + +AWS: - autoscaling\_find\_secrets\_ec2\_launch\_configuration - awslambda\_function\_no\_secrets\_in\_code - awslambda\_function\_no\_secrets\_in\_variables - cloudformation\_stack\_outputs\_find\_secrets +- cloudwatch\_log\_group\_no\_secrets\_in\_logs +- codebuild\_project\_no\_secrets\_in\_variables - ec2\_instance\_secrets\_user\_data - ec2\_launch\_template\_no\_secrets - ecs\_task\_definitions\_no\_environment\_secrets +- glue\_etl\_jobs\_no\_secrets\_in\_arguments - ssm\_document\_secrets +- stepfunctions\_statemachine\_no\_secrets\_in\_definition + +OpenStack: + +- compute\_instance\_metadata\_sensitive\_data +- blockstorage\_volume\_metadata\_sensitive\_data +- blockstorage\_snapshot\_metadata\_sensitive\_data +- objectstorage\_container\_metadata\_sensitive\_data -To execute detect-secrets related checks, you can run the following command: +To execute the secret-scanning checks, run the following command: ```console prowler --categories secrets diff --git a/prowler/CHANGELOG.md b/prowler/CHANGELOG.md index 64e839646a0..86becea4c9e 100644 --- a/prowler/CHANGELOG.md +++ b/prowler/CHANGELOG.md @@ -9,6 +9,12 @@ All notable changes to the **Prowler SDK** are documented in this file. - `entra_conditional_access_policy_explicitly_targets_azure_devops` check for M365 provider, verifying at least one enabled Conditional Access policy explicitly includes the Azure DevOps cloud application instead of relying on a broad "All cloud apps" policy [(#11182)](https://github.com/prowler-cloud/prowler/pull/11182) - `entra_conditional_access_policy_no_exclusion_gaps` check for M365 provider, verifying every user, group, role, or application excluded from an enabled Conditional Access policy stays in scope of another enabled policy [(#11577)](https://github.com/prowler-cloud/prowler/pull/11577) - `stepfunctions_statemachine_encrypted_with_cmk` check for AWS provider, verifying that each Step Functions state machine uses a customer-managed KMS key for encryption at rest rather than the default AWS-owned key [(#11538)](https://github.com/prowler-cloud/prowler/pull/11538) +- `--scan-secrets-validate` flag and `aws.secrets_validate` configuration option to optionally validate the secrets discovered by the secret-scanning checks against the provider APIs; secrets confirmed to be live are reported as critical [(#11694)](https://github.com/prowler-cloud/prowler/pull/11694) + +### 🔄 Changed + +- Replaced the `detect-secrets` library with [Kingfisher](https://github.com/mongodb/kingfisher) as the engine for the secret-scanning checks; scans run fully offline by default and obvious placeholder values are no longer reported as findings [(#11694)](https://github.com/prowler-cloud/prowler/pull/11694) +- Removed the `detect_secrets_plugins` configuration option, which is no longer used by the new secret-scanning engine [(#11694)](https://github.com/prowler-cloud/prowler/pull/11694) --- diff --git a/prowler/config/config.yaml b/prowler/config/config.yaml index 9d41a0e5928..9d8695b8495 100644 --- a/prowler/config/config.yaml +++ b/prowler/config/config.yaml @@ -423,6 +423,13 @@ aws: # Patterns to ignore in the secrets checks secrets_ignore_patterns: [] + # Validate discovered secrets by checking whether they are live against the + # provider APIs. WARNING: this makes outbound network calls that authenticate + # with the discovered secret itself; the credential is exercised against the + # provider and the call will appear in the audited account's logs (and may + # trigger its monitoring). Disabled by default (scans stay fully offline). + secrets_validate: False + # AWS Secrets Manager Configuration # aws.secretsmanager_secret_unused # Maximum number of days a secret can be unused @@ -436,37 +443,6 @@ aws: # Minimum retention period in hours for Kinesis streams min_kinesis_stream_retention_hours: 168 # 7 days - # Detect Secrets plugin configuration - detect_secrets_plugins: [ - {"name": "ArtifactoryDetector"}, - {"name": "AWSKeyDetector"}, - {"name": "AzureStorageKeyDetector"}, - {"name": "BasicAuthDetector"}, - {"name": "CloudantDetector"}, - {"name": "DiscordBotTokenDetector"}, - {"name": "GitHubTokenDetector"}, - {"name": "GitLabTokenDetector"}, - {"name": "Base64HighEntropyString", "limit": 6.0}, - {"name": "HexHighEntropyString", "limit": 3.0}, - {"name": "IbmCloudIamDetector"}, - {"name": "IbmCosHmacDetector"}, - # {"name": "IPPublicDetector"}, https://github.com/Yelp/detect-secrets/pull/885 - {"name": "JwtTokenDetector"}, - {"name": "KeywordDetector"}, - {"name": "MailchimpDetector"}, - {"name": "NpmDetector"}, - {"name": "OpenAIDetector"}, - {"name": "PrivateKeyDetector"}, - {"name": "PypiTokenDetector"}, - {"name": "SendGridDetector"}, - {"name": "SlackDetector"}, - {"name": "SoftlayerDetector"}, - {"name": "SquareOAuthDetector"}, - {"name": "StripeDetector"}, - # {"name": "TelegramBotTokenDetector"}, https://github.com/Yelp/detect-secrets/pull/878 - {"name": "TwilioKeyDetector"}, - ] - # AWS CodeBuild Configuration # aws.codebuild_project_uses_allowed_github_organizations codebuild_github_allowed_organizations: diff --git a/prowler/config/schema/aws.py b/prowler/config/schema/aws.py index 1a44bb9bcc7..d15fc276c5c 100644 --- a/prowler/config/schema/aws.py +++ b/prowler/config/schema/aws.py @@ -101,29 +101,6 @@ def _validate_account_ids(v: Optional[list[str]]) -> Optional[list[str]]: return v -# ---- Nested models ---------------------------------------------------------- - - -class _DetectSecretsPlugin(ProviderConfigBase): - """One entry inside ``detect_secrets_plugins``. - - Only ``name`` is required by the upstream library. ``limit`` is used by - the entropy detectors. Any other plugin-specific kwarg is preserved by - the ``extra="allow"`` policy inherited from ProviderConfigBase. - """ - - name: str - limit: Optional[float] = Field( - default=None, - ge=0.0, - le=10.0, - description=( - "Entropy threshold for detect-secrets entropy plugins. Range: 0..10 " - "(Shannon entropy is bounded by log2(256)=8; >10 is meaningless)." - ), - ) - - # ---- Main schema ------------------------------------------------------------ @@ -394,6 +371,14 @@ class AWSProviderConfig(ProviderConfigBase): # --- Secrets --------------------------------------------------------- secrets_ignore_patterns: Optional[list[str]] = None + secrets_validate: Optional[bool] = Field( + default=None, + description=( + "Validate discovered secrets against the provider APIs (live check). " + "Makes outbound network calls that authenticate with the discovered " + "secret. Disabled by default." + ), + ) max_days_secret_unused: Optional[int] = Field( default=None, ge=7, @@ -417,6 +402,3 @@ class AWSProviderConfig(ProviderConfigBase): le=8760, description="Hours of Kinesis stream retention. Range: 24..8760 (1 day .. 1 year).", ) - - # --- detect-secrets plugin list ------------------------------------- - detect_secrets_plugins: Optional[list[_DetectSecretsPlugin]] = None diff --git a/prowler/lib/cli/parser.py b/prowler/lib/cli/parser.py index b65a702fbca..67a7ea28246 100644 --- a/prowler/lib/cli/parser.py +++ b/prowler/lib/cli/parser.py @@ -473,6 +473,18 @@ def __init_config_parser__(self): default=default_fixer_config_file_path, help="Set configuration fixer file path", ) + config_parser.add_argument( + "--scan-secrets-validate", + action="store_true", + default=False, + help=( + "Validate secrets discovered by the secrets checks by checking " + "whether they are live against the provider APIs. WARNING: this " + "makes outbound network calls using the discovered secret itself; " + "the credential is exercised against the provider and the call " + "appears in the audited account's logs. Disabled by default." + ), + ) def __init_custom_checks_metadata_parser__(self): # CustomChecksMetadata diff --git a/prowler/lib/utils/utils.py b/prowler/lib/utils/utils.py index c4b29f6cc10..6d5db87c7d4 100644 --- a/prowler/lib/utils/utils.py +++ b/prowler/lib/utils/utils.py @@ -9,52 +9,105 @@ pass import re +import shutil +import subprocess import sys import tempfile from datetime import datetime -from hashlib import sha512 +from functools import lru_cache +from hashlib import sha1, sha512 from io import TextIOWrapper from ipaddress import ip_address from os.path import exists from time import mktime -from typing import Any, Optional +from typing import Any, Iterable, Mapping, Optional, Union from colorama import Style -from detect_secrets import SecretsCollection -from detect_secrets.settings import transient_settings from prowler.config.config import encoding_format_utf_8 from prowler.lib.logger import logger -default_detect_secrets_plugins = [ - {"name": "ArtifactoryDetector"}, - {"name": "AWSKeyDetector"}, - {"name": "AzureStorageKeyDetector"}, - {"name": "BasicAuthDetector"}, - {"name": "CloudantDetector"}, - {"name": "DiscordBotTokenDetector"}, - {"name": "GitHubTokenDetector"}, - {"name": "GitLabTokenDetector"}, - {"name": "Base64HighEntropyString", "limit": 6.0}, - {"name": "HexHighEntropyString", "limit": 3.0}, - {"name": "IbmCloudIamDetector"}, - {"name": "IbmCosHmacDetector"}, - # {"name": "IPPublicDetector"}, https://github.com/Yelp/detect-secrets/pull/885 - {"name": "JwtTokenDetector"}, - {"name": "KeywordDetector"}, - {"name": "MailchimpDetector"}, - {"name": "NpmDetector"}, - {"name": "OpenAIDetector"}, - {"name": "PrivateKeyDetector"}, - {"name": "PypiTokenDetector"}, - {"name": "SendGridDetector"}, - {"name": "SlackDetector"}, - {"name": "SoftlayerDetector"}, - {"name": "SquareOAuthDetector"}, - {"name": "StripeDetector"}, - # {"name": "TelegramBotTokenDetector"}, https://github.com/Yelp/detect-secrets/pull/878 - {"name": "TwilioKeyDetector"}, -] +# Default minimum confidence level for reporting findings. "low" is required to +# enable Kingfisher's built-in generic rules (Generic Password / Secret / API +# Key), which preserve the keyword-based coverage Prowler had with +# detect-secrets' KeywordDetector; at "medium" those generic rules do not fire. +# Possible values: "low", "medium", "high". +default_secrets_confidence = "low" + +# Kingfisher exit codes considered successful: 0 (no findings), 200 (findings), +# 205 (validated findings). +_kingfisher_success_exit_codes = (0, 200, 205) + +# Number of payloads scanned per Kingfisher invocation in batch mode. Bounds +# peak temp-disk and memory while still amortizing the per-process spawn cost +# across many fragments (see detect_secrets_scan_batch). +default_secrets_batch_chunk_size = 500 + +# Wall-clock cap (seconds) for a single Kingfisher subprocess, so a hung binary +# cannot block the audit indefinitely. +default_secrets_scan_timeout = 300 + + +@lru_cache(maxsize=1) +def get_kingfisher_binary() -> str: + """Return the path to the bundled Kingfisher binary (cached).""" + from kingfisher import get_binary_path + + return get_binary_path() + + +def _build_kingfisher_command( + scan_paths: list, + output_path: str, + confidence: str, + validate: bool, + no_dedup: bool = False, +) -> list: + """Build the Kingfisher ``scan`` command shared by single and batch scans.""" + command = [ + get_kingfisher_binary(), + "scan", + *scan_paths, + "--format", + "json", + "--output", + output_path, + "--no-update-check", + "--confidence", + confidence, + ] + if validate: + # Live-validate discovered secrets against provider APIs. Use + # conservative defaults (short timeout, no retries) to limit the blast + # radius of the outbound calls. + command += ["--validation-timeout", "5", "--validation-retries", "0"] + else: + command.append("--no-validate") + if no_dedup: + # Report every occurrence (one per file) so batched results match + # scanning each payload individually. + command.append("--no-dedup") + return command + + +def _finding_to_dict(entry: dict, fallback_filename: str) -> dict: + """Convert a Kingfisher finding entry into Prowler's finding dict shape.""" + rule = entry.get("rule", {}) + finding = entry.get("finding", {}) + snippet = finding.get("snippet", "") or "" + return { + "filename": finding.get("path", fallback_filename), + "line_number": finding.get("line"), + "type": rule.get("name"), + # Non-security identifier for the matched secret (matches the + # detect-secrets output shape); not used for security. + "hashed_secret": ( + sha1(snippet.encode(), usedforsecurity=False).hexdigest() + if snippet + else None + ), + "is_verified": finding.get("validation", {}).get("status") == "Active", + } def open_file(input_file: str, mode: str = "r") -> TextIOWrapper: @@ -111,77 +164,153 @@ def hash_sha512(string: str) -> str: return sha512(string.encode(encoding_format_utf_8)).hexdigest()[0:9] -def detect_secrets_scan( - data: str = None, - file=None, - excluded_secrets: list[str] = None, - detect_secrets_plugins: dict = None, -) -> list[dict[str, str]]: - """detect_secrets_scan scans the data or file for secrets using the detect-secrets library. - Args: - data (str): The data to scan for secrets. - file (str): The file to scan for secrets. - excluded_secrets (list): A list of regex patterns to exclude from the scan. - detect_secrets_plugins (dict): The settings to use for the scan. - Returns: - dict: The secrets found in the - Raises: - Exception: If an error occurs during the scan. - Examples: - >>> detect_secrets_scan(data="password=password") - [{'filename': 'data', 'hashed_secret': 'f7c3bc1d808e04732adf679965ccc34ca7ae3441', 'is_verified': False, 'line_number': 1, 'type': 'Secret Keyword'}] - >>> detect_secrets_scan(file="file.txt") - {'file.txt': [{'filename': 'file.txt', 'hashed_secret': 'f7c3bc1d808e04732adf679965ccc34ca7ae3441', 'is_verified': False, 'line_number': 1, 'type': 'Secret Keyword'}]} +def _scan_batch_chunk( + chunk: list, + excluded_secrets: list, + confidence: str, + validate: bool, + results: dict, +) -> None: + """Scan one chunk of ``(key, data)`` payloads in a single Kingfisher call. + + Writes each payload to its own file in a temp directory, scans the whole + directory once (``--no-dedup`` so per-file results match individual scans), + maps findings back to their key by file path, and appends them to + ``results``. The temp directory is always removed. """ + if not chunk: + return + tmp_dir = tempfile.mkdtemp() + temp_output_file = None try: - if not file: - temp_data_file = tempfile.NamedTemporaryFile(delete=False) - temp_data_file.write(bytes(data, encoding="raw_unicode_escape")) - temp_data_file.close() - - secrets = SecretsCollection() - - if not detect_secrets_plugins: - detect_secrets_plugins = default_detect_secrets_plugins - - settings = { - "plugins_used": detect_secrets_plugins, - "filters_used": [ - {"path": "detect_secrets.filters.common.is_invalid_file"}, - {"path": "detect_secrets.filters.common.is_known_false_positive"}, - {"path": "detect_secrets.filters.heuristic.is_likely_id_string"}, - {"path": "detect_secrets.filters.heuristic.is_potential_secret"}, - ], - } - - if excluded_secrets and len(excluded_secrets) > 0: - settings["filters_used"].append( - { - "path": "detect_secrets.filters.regex.should_exclude_line", - "pattern": excluded_secrets, - } + index_to_key = {} + for index, (key, data) in enumerate(chunk): + content = data if data.endswith("\n") else data + "\n" + name = str(index) + with open(os.path.join(tmp_dir, name), "wb") as fh: + fh.write(bytes(content, encoding="raw_unicode_escape")) + index_to_key[name] = key + + temp_output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".json") + temp_output_file.close() + command = _build_kingfisher_command( + [tmp_dir], temp_output_file.name, confidence, validate, no_dedup=True + ) + process = subprocess.run( + command, + capture_output=True, + text=True, + timeout=default_secrets_scan_timeout, + ) + if process.returncode not in _kingfisher_success_exit_codes: + logger.error( + f"Error scanning for secrets: Kingfisher exited with code " + f"{process.returncode}: {process.stderr.strip()[:500]}" ) - with transient_settings(settings): - if file: - secrets.scan_file(file) - else: - secrets.scan_file(temp_data_file.name) - - if not file: - os.remove(temp_data_file.name) - - detect_secrets_output = secrets.json() - - if detect_secrets_output: - if file: - return detect_secrets_output[file] - else: - return detect_secrets_output[temp_data_file.name] - else: - return None + return + + with open(temp_output_file.name, encoding=encoding_format_utf_8) as f: + output = f.read() + kingfisher_output = json.loads(output) if output.strip() else {} + + source_lines_cache = {} + for entry in kingfisher_output.get("findings", []): + finding = entry.get("finding", {}) + name = os.path.basename(finding.get("path", "")) + key = index_to_key.get(name) + if key is None: + continue + line_number = finding.get("line") + if excluded_secrets and line_number: + if name not in source_lines_cache: + with open( + os.path.join(tmp_dir, name), + encoding=encoding_format_utf_8, + errors="replace", + ) as f: + source_lines_cache[name] = f.read().splitlines() + lines = source_lines_cache[name] + if line_number <= len(lines) and any( + re.search(pattern, lines[line_number - 1]) + for pattern in excluded_secrets + ): + continue + results.setdefault(key, []).append(_finding_to_dict(entry, name)) except Exception as e: logger.error(f"Error scanning for secrets: {e}") - return None + finally: + if temp_output_file and os.path.exists(temp_output_file.name): + os.remove(temp_output_file.name) + shutil.rmtree(tmp_dir, ignore_errors=True) + + +def detect_secrets_scan_batch( + payloads: Union[Mapping[Any, str], Iterable[tuple[Any, str]]], + excluded_secrets: Optional[list[str]] = None, + confidence: str = default_secrets_confidence, + validate: bool = False, + chunk_size: int = default_secrets_batch_chunk_size, +) -> dict: + """Scan many payloads with Kingfisher in chunked subprocess invocations. + + This is the scan entry point used by every secret check. Each payload is + written to its own file and scanned with ``--no-dedup`` so per-payload + results match scanning each payload on its own. Payloads are processed in + chunks (writing each to disk and releasing it as it is consumed) to bound + peak temp-disk and memory use while amortizing the per-process spawn cost + across many fragments. + + By default the scan runs fully offline (``--no-validate``, + ``--no-update-check``): no network calls are made, so the scanned data is + never sent anywhere. When ``validate`` is True, Kingfisher additionally + checks whether each discovered secret is live by authenticating with it + against the provider's API (the secret itself is the credential; no extra + permissions are required). That makes outbound network calls, so it must be + explicitly opted in. + + Args: + payloads: a mapping ``{key: data}`` or any iterable of ``(key, data)`` + pairs. ``key`` is any hashable the caller uses to map findings back + to its source (e.g. a variable name or a ``(resource, stream)``). + excluded_secrets (list): regex patterns; a finding whose source line + matches one is excluded. + confidence (str): minimum Kingfisher confidence ("low"/"medium"/"high"). + validate (bool): live-validate discovered secrets (outbound calls). + chunk_size (int): payloads scanned per Kingfisher invocation. + Returns: + dict mapping each key that produced findings to its list of finding + dicts, each with ``filename``, ``line_number``, ``type``, + ``hashed_secret`` and ``is_verified`` keys. Keys with no findings are + omitted. + """ + items = payloads.items() if hasattr(payloads, "items") else payloads + results = {} + chunk = [] + for key, data in items: + chunk.append((key, data)) + if len(chunk) >= chunk_size: + _scan_batch_chunk(chunk, excluded_secrets, confidence, validate, results) + chunk = [] + _scan_batch_chunk(chunk, excluded_secrets, confidence, validate, results) + return results + + +def annotate_verified_secrets(report, secrets: list) -> None: + """Escalate and annotate a finding when any of its secrets is confirmed live. + + When secret validation (``--scan-secrets-validate`` / ``secrets_validate``) + confirms that a discovered secret is live, the finding is more severe than a + potential secret: its severity is raised to critical and a note is appended + to ``status_extended``. No-op when no secret was validated as live, so the + default offline behavior (and existing finding messages) is unchanged. + """ + if secrets and any(secret.get("is_verified") for secret in secrets): + from prowler.lib.check.models import Severity + + report.check_metadata.Severity = Severity.critical + report.status_extended += ( + " One or more of these secrets were confirmed to be live." + ) def validate_ip_address(ip_string): diff --git a/prowler/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration.py b/prowler/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration.py index 6595b710852..cdb7d3e8446 100644 --- a/prowler/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration.py +++ b/prowler/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration.py @@ -4,7 +4,10 @@ from prowler.config.config import encoding_format_utf_8 from prowler.lib.check.models import Check, Check_Report_AWS from prowler.lib.logger import logger -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.aws.services.autoscaling.autoscaling_client import ( autoscaling_client, ) @@ -16,13 +19,19 @@ def execute(self): secrets_ignore_patterns = autoscaling_client.audit_config.get( "secrets_ignore_patterns", [] ) - for ( - configuration_arn, - configuration, - ) in autoscaling_client.launch_configurations.items(): - report = Check_Report_AWS(metadata=self.metadata(), resource=configuration) + validate = autoscaling_client.audit_config.get("secrets_validate", False) + configurations = list(autoscaling_client.launch_configurations.values()) - if configuration.user_data: + # Collect the decoded User Data of each launch configuration and scan it + # all in batched Kingfisher invocations instead of one subprocess each. + # Configurations whose User Data cannot be decoded are undecodable (no report), + # matching the original per-resource behavior. + undecodable = set() + + def payloads(): + for index, configuration in enumerate(configurations): + if not configuration.user_data: + continue user_data = b64decode(configuration.user_data) try: if user_data[0:2] == b"\x1f\x8b": # GZIP magic number @@ -35,24 +44,32 @@ def execute(self): logger.warning( f"{configuration.region} -- Unable to decode user data in autoscaling launch configuration {configuration.name}: {error}" ) + undecodable.add(index) continue except Exception as error: logger.error( f"{configuration.region} -- {error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}" ) + undecodable.add(index) continue + yield index, user_data - has_secrets = detect_secrets_scan( - data=user_data, - excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=autoscaling_client.audit_config.get( - "detect_secrets_plugins" - ), - ) + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) + + for index, configuration in enumerate(configurations): + report = Check_Report_AWS(metadata=self.metadata(), resource=configuration) + if index in undecodable: + report.status = "MANUAL" + report.status_extended = f"Could not decode User Data for autoscaling {configuration.name}; manual review is required to scan for secrets." + elif configuration.user_data: + has_secrets = batch_results.get(index) if has_secrets: report.status = "FAIL" report.status_extended = f"Potential secret found in autoscaling {configuration.name} User Data." + annotate_verified_secrets(report, has_secrets) else: report.status = "PASS" report.status_extended = f"No secrets found in autoscaling {configuration.name} User Data." diff --git a/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code.py b/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code.py index 51c56a20112..21aea8dd231 100644 --- a/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code.py +++ b/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code.py @@ -1,65 +1,86 @@ import os import tempfile +from collections import defaultdict from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.aws.services.awslambda.awslambda_client import awslambda_client class awslambda_function_no_secrets_in_code(Check): def execute(self): findings = [] - if awslambda_client.functions: - secrets_ignore_patterns = awslambda_client.audit_config.get( - "secrets_ignore_patterns", [] - ) + if not awslambda_client.functions: + return findings + + secrets_ignore_patterns = awslambda_client.audit_config.get( + "secrets_ignore_patterns", [] + ) + validate = awslambda_client.audit_config.get("secrets_validate", False) + + # Scan the top-level files of every function's package in batched + # Kingfisher invocations instead of one subprocess per file per function. + # Each package is extracted one at a time and its top-level files are + # read (byte-faithfully via latin-1) before the extraction is released, + # so only a single package is on disk at a time. Findings are keyed by + # (function index, file name) so they can be grouped back per function. + functions_with_code = [] + + def code_payloads(): for function, function_code in awslambda_client._get_function_code(): - if function_code: - report = Check_Report_AWS( - metadata=self.metadata(), resource=function - ) + if not function_code: + continue + index = len(functions_with_code) + functions_with_code.append(function) + with tempfile.TemporaryDirectory() as tmp_dir_name: + function_code.code_zip.extractall(tmp_dir_name) + for file_name in next(os.walk(tmp_dir_name))[2]: + try: + with open( + os.path.join(tmp_dir_name, file_name), "rb" + ) as code_file: + content = code_file.read().decode("latin-1") + except Exception: + continue + yield (index, file_name), content + + batch_results = detect_secrets_scan_batch( + code_payloads(), + excluded_secrets=secrets_ignore_patterns, + validate=validate, + ) + + findings_by_function = defaultdict(dict) + for (index, file_name), file_findings in batch_results.items(): + findings_by_function[index][file_name] = file_findings + + for index, function in enumerate(functions_with_code): + report = Check_Report_AWS(metadata=self.metadata(), resource=function) + report.status = "PASS" + report.status_extended = ( + f"No secrets found in Lambda function {function.name} code." + ) - report.status = "PASS" - report.status_extended = ( - f"No secrets found in Lambda function {function.name} code." + files_with_secrets = findings_by_function.get(index) + if files_with_secrets: + all_secrets = [] + secrets_findings = [] + for file_name, file_findings in files_with_secrets.items(): + all_secrets.extend(file_findings) + secrets_string = ", ".join( + f"{secret['type']} on line {secret['line_number']}" + for secret in file_findings ) - with tempfile.TemporaryDirectory() as tmp_dir_name: - function_code.code_zip.extractall(tmp_dir_name) - # List all files - files_in_zip = next(os.walk(tmp_dir_name))[2] - secrets_findings = [] - for file in files_in_zip: - detect_secrets_output = detect_secrets_scan( - file=f"{tmp_dir_name}/{file}", - excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=awslambda_client.audit_config.get( - "detect_secrets_plugins", - ), - ) - if detect_secrets_output: - for ( - secret - ) in ( - detect_secrets_output - ): # Appears that only 1 file is being scanned at a time, so could rework this - output_file_name = secret["filename"].replace( - f"{tmp_dir_name}/", "" - ) - secrets_string = ", ".join( - [ - f"{secret['type']} on line {secret['line_number']}" - for secret in detect_secrets_output - ] - ) - secrets_findings.append( - f"{output_file_name}: {secrets_string}" - ) + secrets_findings.append(f"{file_name}: {secrets_string}") - if secrets_findings: - final_output_string = "; ".join(secrets_findings) - report.status = "FAIL" - report.status_extended = f"Potential {'secrets' if len(secrets_findings) > 1 else 'secret'} found in Lambda function {function.name} code -> {final_output_string}." + final_output_string = "; ".join(secrets_findings) + report.status = "FAIL" + report.status_extended = f"Potential {'secrets' if len(secrets_findings) > 1 else 'secret'} found in Lambda function {function.name} code -> {final_output_string}." + annotate_verified_secrets(report, all_secrets) - findings.append(report) + findings.append(report) return findings diff --git a/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables.py b/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables.py index 9448b0239f8..bfa4f07dca1 100644 --- a/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables.py +++ b/prowler/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables.py @@ -1,7 +1,10 @@ import json from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.aws.services.awslambda.awslambda_client import awslambda_client @@ -11,7 +14,25 @@ def execute(self): secrets_ignore_patterns = awslambda_client.audit_config.get( "secrets_ignore_patterns", [] ) - for function in awslambda_client.functions.values(): + validate = awslambda_client.audit_config.get("secrets_validate", False) + functions = list(awslambda_client.functions.values()) + + # Scan every function's environment variables in batched Kingfisher + # invocations instead of one subprocess per function. Payloads are + # yielded lazily so only a chunk is held/written at a time, which matters + # for accounts with very large numbers of Lambda functions. + def environment_payloads(): + for index, function in enumerate(functions): + if function.environment: + yield index, json.dumps(function.environment, indent=2) + + batch_results = detect_secrets_scan_batch( + environment_payloads(), + excluded_secrets=secrets_ignore_patterns, + validate=validate, + ) + + for index, function in enumerate(functions): report = Check_Report_AWS(metadata=self.metadata(), resource=function) report.status = "PASS" @@ -20,17 +41,9 @@ def execute(self): ) if function.environment: - detect_secrets_output = detect_secrets_scan( - data=json.dumps(function.environment, indent=2), - excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=awslambda_client.audit_config.get( - "detect_secrets_plugins", - ), - ) - original_env_vars = [] - for name, value in function.environment.items(): - original_env_vars.append(name) + detect_secrets_output = batch_results.get(index) if detect_secrets_output: + original_env_vars = list(function.environment.keys()) secrets_string = ", ".join( [ f"{secret['type']} in variable {original_env_vars[secret['line_number'] - 2]}" @@ -39,6 +52,7 @@ def execute(self): ) report.status = "FAIL" report.status_extended = f"Potential secret found in Lambda function {function.name} variables -> {secrets_string}." + annotate_verified_secrets(report, detect_secrets_output) findings.append(report) diff --git a/prowler/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets.py b/prowler/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets.py index f9b47932bbe..d545c8cbe74 100644 --- a/prowler/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets.py +++ b/prowler/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets.py @@ -1,5 +1,8 @@ from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.aws.services.cloudformation.cloudformation_client import ( cloudformation_client, ) @@ -14,26 +17,28 @@ def execute(self): secrets_ignore_patterns = cloudformation_client.audit_config.get( "secrets_ignore_patterns", [] ) - for stack in cloudformation_client.stacks: + validate = cloudformation_client.audit_config.get("secrets_validate", False) + stacks = list(cloudformation_client.stacks) + + # Collect one payload per stack (its Outputs) and scan them all in + # batched Kingfisher invocations instead of one subprocess per stack. + def payloads(): + for index, stack in enumerate(stacks): + if stack.outputs: + yield index, "".join(f"{output}\n" for output in stack.outputs) + + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) + + for index, stack in enumerate(stacks): report = Check_Report_AWS(metadata=self.metadata(), resource=stack) report.status = "PASS" report.status_extended = ( f"No secrets found in CloudFormation Stack {stack.name} Outputs." ) if stack.outputs: - data = "" - # Store the CloudFormation Stack Outputs into a file - for output in stack.outputs: - data += f"{output}\n" - - detect_secrets_output = detect_secrets_scan( - data=data, - excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=cloudformation_client.audit_config.get( - "detect_secrets_plugins", - ), - ) - # If secrets are found, update the report status + detect_secrets_output = batch_results.get(index) if detect_secrets_output: secrets_string = ", ".join( [ @@ -43,7 +48,7 @@ def execute(self): ) report.status = "FAIL" report.status_extended = f"Potential secret found in CloudFormation Stack {stack.name} Outputs -> {secrets_string}." - + annotate_verified_secrets(report, detect_secrets_output) else: report.status = "PASS" report.status_extended = ( diff --git a/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py b/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py index 5154a5acb75..87b76dfbd03 100644 --- a/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py +++ b/prowler/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs.py @@ -1,7 +1,10 @@ from json import dumps, loads from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.aws.services.cloudwatch.cloudwatch_service import ( convert_to_cloudwatch_timestamp_format, ) @@ -11,95 +14,130 @@ class cloudwatch_log_group_no_secrets_in_logs(Check): def execute(self): findings = [] - if logs_client.log_groups: - secrets_ignore_patterns = logs_client.audit_config.get( - "secrets_ignore_patterns", [] - ) + if not logs_client.log_groups: + return findings + + secrets_ignore_patterns = logs_client.audit_config.get( + "secrets_ignore_patterns", [] + ) + validate = logs_client.audit_config.get("secrets_validate", False) + + # Phase 1: batch-scan every (log group, log stream). Payloads are yielded + # lazily so only a chunk is written/held at a time, which matters for + # accounts with very large numbers of log groups/streams. + def stream_payloads(): for log_group in logs_client.log_groups.values(): - report = Check_Report_AWS(metadata=self.metadata(), resource=log_group) - report.status = "PASS" - report.status_extended = ( - f"No secrets found in {log_group.name} log group." - ) - log_group_secrets = [] - if log_group.log_streams: - for log_stream_name in log_group.log_streams: - log_stream_secrets = {} - log_stream_data = "\n".join( - [ - dumps(event["message"]) - for event in log_group.log_streams[log_stream_name] - ] - ) - log_stream_secrets_output = detect_secrets_scan( - data=log_stream_data, - excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=logs_client.audit_config.get( - "detect_secrets_plugins", - ), + if not log_group.log_streams: + continue + for log_stream_name, events in log_group.log_streams.items(): + yield ( + (log_group.name, log_stream_name), + "\n".join(dumps(event["message"]) for event in events), + ) + + stream_results = detect_secrets_scan_batch( + stream_payloads(), + excluded_secrets=secrets_ignore_patterns, + validate=validate, + ) + + # Phase 2: plan the per-timestamp secrets for each flagged stream and + # collect the multiline events to rescan. Each multiline event is + # rescanned once (keyed by timestamp) to resolve per-line detail; the + # rescans are batched in Phase 3 instead of one subprocess per event. + stream_plans = {} # (group, stream) -> {timestamp: {"multiline", "types"}} + rescan_payloads = {} # (group, stream, timestamp) -> multiline event data + for log_group in logs_client.log_groups.values(): + for log_stream_name in log_group.log_streams or {}: + stream_secrets = stream_results.get((log_group.name, log_stream_name)) + if not stream_secrets: + continue + events = log_group.log_streams[log_stream_name] + plan = {} + for secret in stream_secrets: + flagged_event = events[secret["line_number"] - 1] + cloudwatch_timestamp = convert_to_cloudwatch_timestamp_format( + flagged_event["timestamp"] + ) + try: + log_event_data = dumps( + loads(flagged_event["message"]), indent=2 ) + except Exception: + log_event_data = dumps(flagged_event["message"], indent=2) + multiline = len(log_event_data.split("\n")) > 1 + if cloudwatch_timestamp not in plan: + plan[cloudwatch_timestamp] = { + "multiline": multiline, + "types": [], + } + if multiline: + # More informative output is possible with more than one + # line: the event is rescanned to get the type and line + # number of each secret. + rescan_payloads[ + (log_group.name, log_stream_name, cloudwatch_timestamp) + ] = log_event_data + else: + plan[cloudwatch_timestamp]["types"].append(secret["type"]) + stream_plans[(log_group.name, log_stream_name)] = plan - if log_stream_secrets_output: - for secret in log_stream_secrets_output: - flagged_event = log_group.log_streams[log_stream_name][ - secret["line_number"] - 1 - ] - cloudwatch_timestamp = ( - convert_to_cloudwatch_timestamp_format( - flagged_event["timestamp"] - ) - ) - if ( - cloudwatch_timestamp - not in log_stream_secrets.keys() - ): - log_stream_secrets[cloudwatch_timestamp] = ( - SecretsDict() - ) + # Phase 3: one batched rescan for all multiline flagged events. Validation + # is never enabled here: this rescan only resolves line numbers for + # display and must not re-authenticate the secret. + rescan_results = ( + detect_secrets_scan_batch( + rescan_payloads, excluded_secrets=secrets_ignore_patterns + ) + if rescan_payloads + else {} + ) - try: - log_event_data = dumps( - loads(flagged_event["message"]), indent=2 - ) - except Exception: - log_event_data = dumps( - flagged_event["message"], indent=2 - ) - if len(log_event_data.split("\n")) > 1: - # Can get more informative output if there is more than 1 line. - # Will rescan just this event to get the type of secret and the line number - event_detect_secrets_output = detect_secrets_scan( - data=log_event_data, - detect_secrets_plugins=logs_client.audit_config.get( - "detect_secrets_plugins" - ), - ) - if event_detect_secrets_output: - for secret in event_detect_secrets_output: - log_stream_secrets[ - cloudwatch_timestamp - ].add_secret( - secret["line_number"], secret["type"] - ) - else: - log_stream_secrets[cloudwatch_timestamp].add_secret( - 1, secret["type"] - ) - if log_stream_secrets: - secrets_string = "; ".join( - [ - f"at {timestamp} - {log_stream_secrets[timestamp].to_string()}" - for timestamp in log_stream_secrets - ] - ) - log_group_secrets.append( - f"in log stream {log_stream_name} {secrets_string}" + # Phase 4: assemble one report per log group. + for log_group in logs_client.log_groups.values(): + report = Check_Report_AWS(metadata=self.metadata(), resource=log_group) + report.status = "PASS" + report.status_extended = f"No secrets found in {log_group.name} log group." + log_group_secrets = [] + all_secrets = [] + for log_stream_name in log_group.log_streams or {}: + stream_secrets = stream_results.get((log_group.name, log_stream_name)) + if not stream_secrets: + continue + all_secrets.extend(stream_secrets) + log_stream_secrets = {} + for cloudwatch_timestamp, entry in stream_plans[ + (log_group.name, log_stream_name) + ].items(): + secrets_dict = SecretsDict() + if entry["multiline"]: + for event_secret in rescan_results.get( + (log_group.name, log_stream_name, cloudwatch_timestamp), + [], + ): + secrets_dict.add_secret( + event_secret["line_number"], event_secret["type"] ) - if log_group_secrets: - secrets_string = "; ".join(log_group_secrets) - report.status = "FAIL" - report.status_extended = f"Potential secrets found in log group {log_group.name} {secrets_string}." - findings.append(report) + else: + for secret_type in entry["types"]: + secrets_dict.add_secret(1, secret_type) + log_stream_secrets[cloudwatch_timestamp] = secrets_dict + if log_stream_secrets: + secrets_string = "; ".join( + [ + f"at {timestamp} - {log_stream_secrets[timestamp].to_string()}" + for timestamp in log_stream_secrets + ] + ) + log_group_secrets.append( + f"in log stream {log_stream_name} {secrets_string}" + ) + if log_group_secrets: + secrets_string = "; ".join(log_group_secrets) + report.status = "FAIL" + report.status_extended = f"Potential secrets found in log group {log_group.name} {secrets_string}." + annotate_verified_secrets(report, all_secrets) + findings.append(report) return findings diff --git a/prowler/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables.py b/prowler/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables.py index 8d031cc25ad..1114ce76ae7 100644 --- a/prowler/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables.py +++ b/prowler/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables.py @@ -1,7 +1,10 @@ import json from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.aws.services.codebuild.codebuild_client import codebuild_client @@ -14,35 +17,53 @@ def execute(self): secrets_ignore_patterns = codebuild_client.audit_config.get( "secrets_ignore_patterns", [] ) - for project in codebuild_client.projects.values(): + validate = codebuild_client.audit_config.get("secrets_validate", False) + projects = list(codebuild_client.projects.values()) + + # Collect every scannable plaintext variable across all projects and scan + # them in batched Kingfisher invocations instead of one subprocess per + # variable. Findings are keyed by (project index, variable index). + def payloads(): + for project_index, project in enumerate(projects): + if project.environment_variables: + for var_index, env_var in enumerate(project.environment_variables): + if ( + env_var.type == "PLAINTEXT" + and env_var.name not in sensitive_vars_excluded + ): + yield (project_index, var_index), json.dumps( + {env_var.name: env_var.value} + ) + + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) + + for project_index, project in enumerate(projects): report = Check_Report_AWS(metadata=self.metadata(), resource=project) report.status = "PASS" report.status_extended = f"CodeBuild project {project.name} does not have sensitive environment plaintext credentials." secrets_found = [] + all_secrets = [] if project.environment_variables: - for env_var in project.environment_variables: - if ( - env_var.type == "PLAINTEXT" - and env_var.name not in sensitive_vars_excluded - ): - detect_secrets_output = detect_secrets_scan( - data=json.dumps({env_var.name: env_var.value}), - excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=codebuild_client.audit_config.get( - "detect_secrets_plugins", - ), - ) - if detect_secrets_output: - secrets_info = [ + for var_index, env_var in enumerate(project.environment_variables): + detect_secrets_output = batch_results.get( + (project_index, var_index) + ) + if detect_secrets_output: + all_secrets.extend(detect_secrets_output) + secrets_found.extend( + [ f"{secret['type']} in variable {env_var.name}" for secret in detect_secrets_output ] - secrets_found.extend(secrets_info) + ) if secrets_found: report.status = "FAIL" report.status_extended = f"CodeBuild project {project.name} has sensitive environment plaintext credentials in variables: {', '.join(secrets_found)}." + annotate_verified_secrets(report, all_secrets) findings.append(report) diff --git a/prowler/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data.py b/prowler/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data.py index 3c3864c4799..9287a7a03c4 100644 --- a/prowler/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data.py +++ b/prowler/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data.py @@ -4,7 +4,10 @@ from prowler.config.config import encoding_format_utf_8 from prowler.lib.check.models import Check, Check_Report_AWS from prowler.lib.logger import logger -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.aws.services.ec2.ec2_client import ec2_client @@ -14,54 +17,73 @@ def execute(self): secrets_ignore_patterns = ec2_client.audit_config.get( "secrets_ignore_patterns", [] ) - for instance in ec2_client.instances: - if instance.state != "terminated": - report = Check_Report_AWS(metadata=self.metadata(), resource=instance) - if instance.user_data: - user_data = b64decode(instance.user_data) - try: - if user_data[0:2] == b"\x1f\x8b": # GZIP magic number - user_data = zlib.decompress( - user_data, zlib.MAX_WBITS | 32 - ).decode(encoding_format_utf_8) - else: - user_data = user_data.decode(encoding_format_utf_8) - except UnicodeDecodeError as error: - logger.warning( - f"{instance.region} -- Unable to decode user data in EC2 instance {instance.id}: {error}" - ) - continue - except Exception as error: - logger.error( - f"{instance.region} -- {error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}" - ) - continue - detect_secrets_output = detect_secrets_scan( - data=user_data, - excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=ec2_client.audit_config.get( - "detect_secrets_plugins" - ), - ) - if detect_secrets_output: - secrets_string = ", ".join( - [ - f"{secret['type']} on line {secret['line_number']}" - for secret in detect_secrets_output - ] - ) - report.status = "FAIL" - report.status_extended = f"Potential secret found in EC2 instance {instance.id} User Data -> {secrets_string}." + validate = ec2_client.audit_config.get("secrets_validate", False) + instances = list(ec2_client.instances) + + # Collect the decoded User Data of each non-terminated instance and scan + # it all in batched Kingfisher invocations instead of one subprocess each. + # Instances whose User Data cannot be decoded are undecodable (no report), + # matching the original per-resource behavior. + undecodable = set() + def payloads(): + for index, instance in enumerate(instances): + if instance.state == "terminated" or not instance.user_data: + continue + user_data = b64decode(instance.user_data) + try: + if user_data[0:2] == b"\x1f\x8b": # GZIP magic number + user_data = zlib.decompress( + user_data, zlib.MAX_WBITS | 32 + ).decode(encoding_format_utf_8) else: - report.status = "PASS" - report.status_extended = ( - f"No secrets found in EC2 instance {instance.id} User Data." - ) + user_data = user_data.decode(encoding_format_utf_8) + except UnicodeDecodeError as error: + logger.warning( + f"{instance.region} -- Unable to decode user data in EC2 instance {instance.id}: {error}" + ) + undecodable.add(index) + continue + except Exception as error: + logger.error( + f"{instance.region} -- {error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}" + ) + undecodable.add(index) + continue + yield index, user_data + + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) + + for index, instance in enumerate(instances): + if instance.state == "terminated": + continue + report = Check_Report_AWS(metadata=self.metadata(), resource=instance) + if index in undecodable: + report.status = "MANUAL" + report.status_extended = f"Could not decode User Data for EC2 instance {instance.id}; manual review is required to scan for secrets." + elif instance.user_data: + detect_secrets_output = batch_results.get(index) + if detect_secrets_output: + secrets_string = ", ".join( + [ + f"{secret['type']} on line {secret['line_number']}" + for secret in detect_secrets_output + ] + ) + report.status = "FAIL" + report.status_extended = f"Potential secret found in EC2 instance {instance.id} User Data -> {secrets_string}." + annotate_verified_secrets(report, detect_secrets_output) else: report.status = "PASS" - report.status_extended = f"No secrets found in EC2 instance {instance.id} since User Data is empty." + report.status_extended = ( + f"No secrets found in EC2 instance {instance.id} User Data." + ) + else: + report.status = "PASS" + report.status_extended = f"No secrets found in EC2 instance {instance.id} since User Data is empty." - findings.append(report) + findings.append(report) return findings diff --git a/prowler/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets.py b/prowler/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets.py index 823553bcdf3..029f9c34a1d 100644 --- a/prowler/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets.py +++ b/prowler/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets.py @@ -4,7 +4,10 @@ from prowler.config.config import encoding_format_utf_8 from prowler.lib.check.models import Check, Check_Report_AWS from prowler.lib.logger import logger -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.aws.services.ec2.ec2_client import ec2_client @@ -14,43 +17,61 @@ def execute(self): secrets_ignore_patterns = ec2_client.audit_config.get( "secrets_ignore_patterns", [] ) - for template in ec2_client.launch_templates: - report = Check_Report_AWS(metadata=self.metadata(), resource=template) + validate = ec2_client.audit_config.get("secrets_validate", False) + templates = list(ec2_client.launch_templates) - versions_with_secrets = [] + # Track versions whose User Data cannot be decoded so the template is + # surfaced (MANUAL) instead of silently claiming no secrets were found. + undecodable_versions = {} - for version in template.versions: - if not version.template_data.user_data: - continue - user_data = b64decode(version.template_data.user_data) + # Collect the decoded User Data of every (template, version) and scan it + # all in batched Kingfisher invocations instead of one subprocess per + # version. Versions whose User Data cannot be decoded are recorded above. + def payloads(): + for template_index, template in enumerate(templates): + for version_index, version in enumerate(template.versions): + if not version.template_data.user_data: + continue + user_data = b64decode(version.template_data.user_data) + try: + if user_data[0:2] == b"\x1f\x8b": # GZIP magic number + user_data = zlib.decompress( + user_data, zlib.MAX_WBITS | 32 + ).decode(encoding_format_utf_8) + else: + user_data = user_data.decode(encoding_format_utf_8) + except UnicodeDecodeError as error: + logger.warning( + f"{template.region} -- Unable to decode User Data in EC2 Launch Template {template.name} version {version.version_number}: {error}" + ) + undecodable_versions.setdefault(template_index, []).append( + version.version_number + ) + continue + except Exception as error: + logger.error( + f"{template.region} -- {error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}" + ) + undecodable_versions.setdefault(template_index, []).append( + version.version_number + ) + continue + yield (template_index, version_index), user_data - try: - if user_data[0:2] == b"\x1f\x8b": # GZIP magic number - user_data = zlib.decompress( - user_data, zlib.MAX_WBITS | 32 - ).decode(encoding_format_utf_8) - else: - user_data = user_data.decode(encoding_format_utf_8) - except UnicodeDecodeError as error: - logger.warning( - f"{template.region} -- Unable to decode User Data in EC2 Launch Template {template.name} version {version.version_number}: {error}" - ) - continue - except Exception as error: - logger.error( - f"{template.region} -- {error.__class__.__name__}[{error.__traceback__.tb_lineno}]: {error}" - ) - continue + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) + + for template_index, template in enumerate(templates): + report = Check_Report_AWS(metadata=self.metadata(), resource=template) - version_secrets = detect_secrets_scan( - data=user_data, - excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=ec2_client.audit_config.get( - "detect_secrets_plugins" - ), - ) + versions_with_secrets = [] + all_secrets = [] + for version_index, version in enumerate(template.versions): + version_secrets = batch_results.get((template_index, version_index)) if version_secrets: + all_secrets.extend(version_secrets) secrets_string = ", ".join( [ f"{secret['type']} on line {secret['line_number']}" @@ -61,9 +82,14 @@ def execute(self): f"Version {version.version_number}: {secrets_string}" ) + undecodable = undecodable_versions.get(template_index, []) if len(versions_with_secrets) > 0: report.status = "FAIL" report.status_extended = f"Potential secret found in User Data for EC2 Launch Template {template.name} in template versions: {', '.join(versions_with_secrets)}." + annotate_verified_secrets(report, all_secrets) + elif undecodable: + report.status = "MANUAL" + report.status_extended = f"Could not decode User Data for EC2 Launch Template {template.name} versions: {', '.join(str(version_number) for version_number in undecodable)}; manual review is required to scan for secrets." else: report.status = "PASS" report.status_extended = f"No secrets found in User Data of any version for EC2 Launch Template {template.name}." diff --git a/prowler/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets.py b/prowler/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets.py index cd835d01498..dc8a0991f73 100644 --- a/prowler/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets.py +++ b/prowler/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets.py @@ -1,7 +1,10 @@ from json import dumps from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.aws.services.ecs.ecs_client import ecs_client @@ -11,33 +14,49 @@ def execute(self): secrets_ignore_patterns = ecs_client.audit_config.get( "secrets_ignore_patterns", [] ) - for task_definition in ecs_client.task_definitions.values(): + validate = ecs_client.audit_config.get("secrets_validate", False) + task_definitions = list(ecs_client.task_definitions.values()) + + # Scan every (task definition, container) environment in batched + # Kingfisher invocations instead of one subprocess per container. + # Payloads are yielded lazily so only a chunk is held/written at a time. + def environment_payloads(): + for td_index, task_definition in enumerate(task_definitions): + for c_index, container in enumerate( + task_definition.container_definitions + ): + if container.environment: + dump_env_vars = { + env_var.name: env_var.value + for env_var in container.environment + } + yield (td_index, c_index), dumps(dump_env_vars, indent=2) + + batch_results = detect_secrets_scan_batch( + environment_payloads(), + excluded_secrets=secrets_ignore_patterns, + validate=validate, + ) + + for td_index, task_definition in enumerate(task_definitions): report = Check_Report_AWS( metadata=self.metadata(), resource=task_definition ) report.resource_id = f"{task_definition.name}:{task_definition.revision}" report.status = "PASS" extended_status_parts = [] + all_secrets = [] - for container in task_definition.container_definitions: + for c_index, container in enumerate(task_definition.container_definitions): container_secrets_found = [] if container.environment: - dump_env_vars = {} - original_env_vars = [] - for env_var in container.environment: - dump_env_vars.update({env_var.name: env_var.value}) - original_env_vars.append(env_var.name) - - env_data = dumps(dump_env_vars, indent=2) - detect_secrets_output = detect_secrets_scan( - data=env_data, - excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=ecs_client.audit_config.get( - "detect_secrets_plugins", - ), - ) + original_env_vars = [ + env_var.name for env_var in container.environment + ] + detect_secrets_output = batch_results.get((td_index, c_index)) if detect_secrets_output: + all_secrets.extend(detect_secrets_output) secrets_string = ", ".join( [ f"{secret['type']} on the environment variable {original_env_vars[secret['line_number'] - 2]}" @@ -56,6 +75,7 @@ def execute(self): + "; ".join(extended_status_parts) + "." ) + annotate_verified_secrets(report, all_secrets) else: report.status_extended = f"No secrets found in variables of ECS task definition {task_definition.name} with revision {task_definition.revision}." findings.append(report) diff --git a/prowler/providers/aws/services/glue/glue_etl_jobs_no_secrets_in_arguments/glue_etl_jobs_no_secrets_in_arguments.py b/prowler/providers/aws/services/glue/glue_etl_jobs_no_secrets_in_arguments/glue_etl_jobs_no_secrets_in_arguments.py index 50c92f86193..1ef8fc72d02 100644 --- a/prowler/providers/aws/services/glue/glue_etl_jobs_no_secrets_in_arguments/glue_etl_jobs_no_secrets_in_arguments.py +++ b/prowler/providers/aws/services/glue/glue_etl_jobs_no_secrets_in_arguments/glue_etl_jobs_no_secrets_in_arguments.py @@ -1,52 +1,68 @@ -import json - -from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import detect_secrets_scan -from prowler.providers.aws.services.glue.glue_client import glue_client - - -class glue_etl_jobs_no_secrets_in_arguments(Check): - """Check if Glue ETL jobs have secrets in their default arguments. - - Scans the DefaultArguments of each Glue job for hardcoded credentials, - tokens, passwords, and other sensitive values that should be stored in - Secrets Manager or Parameter Store instead. - """ - - def execute(self): - findings = [] - secrets_ignore_patterns = glue_client.audit_config.get( - "secrets_ignore_patterns", [] - ) - for job in glue_client.jobs: - report = Check_Report_AWS(metadata=self.metadata(), resource=job) - report.status = "PASS" - report.status_extended = ( - f"No secrets found in Glue job {job.name} default arguments." - ) - - if job.arguments: - secrets_found = [] - for arg_name, arg_value in job.arguments.items(): - detect_secrets_output = detect_secrets_scan( - data=json.dumps({arg_name: arg_value}), - excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=glue_client.audit_config.get( - "detect_secrets_plugins", - ), - ) - if detect_secrets_output: - secrets_found.extend( - [ - f"{secret['type']} in argument {arg_name}" - for secret in detect_secrets_output - ] - ) - - if secrets_found: - report.status = "FAIL" - report.status_extended = f"Potential secrets found in Glue job {job.name} default arguments: {', '.join(secrets_found)}." - - findings.append(report) - - return findings +import json + +from prowler.lib.check.models import Check, Check_Report_AWS +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) +from prowler.providers.aws.services.glue.glue_client import glue_client + + +class glue_etl_jobs_no_secrets_in_arguments(Check): + """Check if Glue ETL jobs have secrets in their default arguments. + + Scans the DefaultArguments of each Glue job for hardcoded credentials, + tokens, passwords, and other sensitive values that should be stored in + Secrets Manager or Parameter Store instead. + """ + + def execute(self): + findings = [] + secrets_ignore_patterns = glue_client.audit_config.get( + "secrets_ignore_patterns", [] + ) + validate = glue_client.audit_config.get("secrets_validate", False) + jobs = list(glue_client.jobs) + + # Collect every default argument across all jobs and scan them in batched + # Kingfisher invocations instead of one subprocess per argument. Findings + # are keyed by (job index, argument name). + def payloads(): + for job_index, job in enumerate(jobs): + if job.arguments: + for arg_name, arg_value in job.arguments.items(): + yield (job_index, arg_name), json.dumps({arg_name: arg_value}) + + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) + + for job_index, job in enumerate(jobs): + report = Check_Report_AWS(metadata=self.metadata(), resource=job) + report.status = "PASS" + report.status_extended = ( + f"No secrets found in Glue job {job.name} default arguments." + ) + + if job.arguments: + secrets_found = [] + all_secrets = [] + for arg_name in job.arguments: + detect_secrets_output = batch_results.get((job_index, arg_name)) + if detect_secrets_output: + all_secrets.extend(detect_secrets_output) + secrets_found.extend( + [ + f"{secret['type']} in argument {arg_name}" + for secret in detect_secrets_output + ] + ) + + if secrets_found: + report.status = "FAIL" + report.status_extended = f"Potential secrets found in Glue job {job.name} default arguments: {', '.join(secrets_found)}." + annotate_verified_secrets(report, all_secrets) + + findings.append(report) + + return findings diff --git a/prowler/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets.py b/prowler/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets.py index 0ec8502babe..0fac7dd5880 100644 --- a/prowler/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets.py +++ b/prowler/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets.py @@ -1,7 +1,10 @@ import json from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.aws.services.ssm.ssm_client import ssm_client @@ -11,7 +14,21 @@ def execute(self): secrets_ignore_patterns = ssm_client.audit_config.get( "secrets_ignore_patterns", [] ) - for document in ssm_client.documents.values(): + validate = ssm_client.audit_config.get("secrets_validate", False) + documents = list(ssm_client.documents.values()) + + # Collect one payload per document (its content) and scan them all in + # batched Kingfisher invocations instead of one subprocess per document. + def payloads(): + for index, document in enumerate(documents): + if document.content: + yield index, json.dumps(document.content, indent=2) + + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) + + for index, document in enumerate(documents): report = Check_Report_AWS(metadata=self.metadata(), resource=document) report.status = "PASS" report.status_extended = ( @@ -19,13 +36,7 @@ def execute(self): ) if document.content: - detect_secrets_output = detect_secrets_scan( - data=json.dumps(document.content, indent=2), - excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=ssm_client.audit_config.get( - "detect_secrets_plugins" - ), - ) + detect_secrets_output = batch_results.get(index) if detect_secrets_output: secrets_string = ", ".join( [ @@ -35,6 +46,7 @@ def execute(self): ) report.status = "FAIL" report.status_extended = f"Potential secret found in SSM Document {document.name} -> {secrets_string}." + annotate_verified_secrets(report, detect_secrets_output) findings.append(report) diff --git a/prowler/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition.py b/prowler/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition.py index db047100296..eab8c9ec569 100644 --- a/prowler/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition.py +++ b/prowler/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition.py @@ -1,5 +1,8 @@ from prowler.lib.check.models import Check, Check_Report_AWS -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.aws.services.stepfunctions.stepfunctions_client import ( stepfunctions_client, ) @@ -13,20 +16,27 @@ def execute(self) -> list[Check_Report_AWS]: secrets_ignore_patterns = stepfunctions_client.audit_config.get( "secrets_ignore_patterns", [] ) - for state_machine in stepfunctions_client.state_machines.values(): + validate = stepfunctions_client.audit_config.get("secrets_validate", False) + state_machines = list(stepfunctions_client.state_machines.values()) + + # Collect one payload per state machine (its definition) and scan them + # all in batched Kingfisher invocations instead of one subprocess each. + def payloads(): + for index, state_machine in enumerate(state_machines): + if state_machine.definition: + yield index, state_machine.definition + + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) + + for index, state_machine in enumerate(state_machines): report = Check_Report_AWS(metadata=self.metadata(), resource=state_machine) report.status = "PASS" report.status_extended = f"No secrets found in Step Functions state machine {state_machine.name} definition." if state_machine.definition: - detect_secrets_output = detect_secrets_scan( - data=state_machine.definition, - excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=stepfunctions_client.audit_config.get( - "detect_secrets_plugins", - ), - ) - + detect_secrets_output = batch_results.get(index) if detect_secrets_output: secrets_string = ", ".join( [ @@ -40,6 +50,7 @@ def execute(self) -> list[Check_Report_AWS]: f"found in Step Functions state machine {state_machine.name} definition " f"-> {secrets_string}." ) + annotate_verified_secrets(report, detect_secrets_output) findings.append(report) return findings diff --git a/prowler/providers/common/models.py b/prowler/providers/common/models.py index 120cc1a3747..84e71c48093 100644 --- a/prowler/providers/common/models.py +++ b/prowler/providers/common/models.py @@ -49,6 +49,16 @@ def __init__(self, arguments, bulk_checks_metadata): if updated_audit_config: provider._audit_config = updated_audit_config + # Secrets validation: --scan-secrets-validate opts into live validation + # of discovered secrets. Set the audit_config key directly so it applies + # even for providers whose default config does not declare it. + self.scan_secrets_validate = getattr(arguments, "scan_secrets_validate", False) + if self.scan_secrets_validate: + provider = Provider.get_global_provider() + audit_config = provider.audit_config or {} + audit_config["secrets_validate"] = True + provider._audit_config = audit_config + # Check output directory, if it is not created -> create it if self.output_directory and not self.fixer: if not isdir(self.output_directory): diff --git a/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.metadata.json b/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.metadata.json index 597d3ab4d4d..bd83049d826 100644 --- a/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.metadata.json +++ b/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.metadata.json @@ -36,5 +36,5 @@ "RelatedTo": [ "blockstorage_volume_metadata_sensitive_data" ], - "Notes": "This check uses the detect-secrets library to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns and detect_secrets_plugins to customize detection." + "Notes": "This check uses Kingfisher to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns." } diff --git a/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.py b/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.py index 95de6288713..d1069953d07 100644 --- a/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.py +++ b/prowler/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data.py @@ -2,7 +2,10 @@ from typing import List from prowler.lib.check.models import Check, CheckReportOpenStack -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.openstack.services.blockstorage.blockstorage_client import ( blockstorage_client, ) @@ -16,30 +19,28 @@ def execute(self) -> List[CheckReportOpenStack]: secrets_ignore_patterns = blockstorage_client.audit_config.get( "secrets_ignore_patterns", [] ) + validate = blockstorage_client.audit_config.get("secrets_validate", False) + snapshots = list(blockstorage_client.snapshots) + + # Collect one payload per snapshot (its metadata) and scan them all in + # batched Kingfisher invocations instead of one subprocess per snapshot. + def payloads(): + for index, snapshot in enumerate(snapshots): + if snapshot.metadata: + yield index, json.dumps(dict(snapshot.metadata), indent=2) - for snapshot in blockstorage_client.snapshots: + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) + + for index, snapshot in enumerate(snapshots): report = CheckReportOpenStack(metadata=self.metadata(), resource=snapshot) report.status = "PASS" report.status_extended = f"Snapshot {snapshot.name} ({snapshot.id}) metadata does not contain sensitive data." if snapshot.metadata: - # Build metadata dict and parallel list of keys - dump_metadata = {} - original_metadata_keys = [] - for key, value in snapshot.metadata.items(): - dump_metadata[key] = value - original_metadata_keys.append(key) - - # Convert metadata dict to JSON string for detect-secrets scanning - metadata_json = json.dumps(dump_metadata, indent=2) - detect_secrets_output = detect_secrets_scan( - data=metadata_json, - excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=blockstorage_client.audit_config.get( - "detect_secrets_plugins" - ), - ) - + original_metadata_keys = list(snapshot.metadata.keys()) + detect_secrets_output = batch_results.get(index) if detect_secrets_output: # Map line numbers back to metadata keys using the parallel list # Line numbering: line 1 = "{", line 2 = first key-value, etc. @@ -54,6 +55,7 @@ def execute(self) -> List[CheckReportOpenStack]: ) report.status = "FAIL" report.status_extended = f"Snapshot {snapshot.name} ({snapshot.id}) metadata contains potential secrets -> {secrets_string}." + annotate_verified_secrets(report, detect_secrets_output) else: report.status_extended = f"Snapshot {snapshot.name} ({snapshot.id}) has no metadata (no sensitive data exposure risk)." diff --git a/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.metadata.json b/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.metadata.json index ec17ee02d1b..79874db214d 100644 --- a/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.metadata.json +++ b/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.metadata.json @@ -34,5 +34,5 @@ ], "DependsOn": [], "RelatedTo": [], - "Notes": "This check uses the detect-secrets library to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns and detect_secrets_plugins to customize detection." + "Notes": "This check uses Kingfisher to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns." } diff --git a/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.py b/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.py index 1bfa84c3df4..968d1c1ac58 100644 --- a/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.py +++ b/prowler/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data.py @@ -2,7 +2,10 @@ from typing import List from prowler.lib.check.models import Check, CheckReportOpenStack -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.openstack.services.blockstorage.blockstorage_client import ( blockstorage_client, ) @@ -16,30 +19,28 @@ def execute(self) -> List[CheckReportOpenStack]: secrets_ignore_patterns = blockstorage_client.audit_config.get( "secrets_ignore_patterns", [] ) + validate = blockstorage_client.audit_config.get("secrets_validate", False) + volumes = list(blockstorage_client.volumes) + + # Collect one payload per volume (its metadata) and scan them all in + # batched Kingfisher invocations instead of one subprocess per volume. + def payloads(): + for index, volume in enumerate(volumes): + if volume.metadata: + yield index, json.dumps(dict(volume.metadata), indent=2) - for volume in blockstorage_client.volumes: + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) + + for index, volume in enumerate(volumes): report = CheckReportOpenStack(metadata=self.metadata(), resource=volume) report.status = "PASS" report.status_extended = f"Volume {volume.name} ({volume.id}) metadata does not contain sensitive data." if volume.metadata: - # Build metadata dict and parallel list of keys - dump_metadata = {} - original_metadata_keys = [] - for key, value in volume.metadata.items(): - dump_metadata[key] = value - original_metadata_keys.append(key) - - # Convert metadata dict to JSON string for detect-secrets scanning - metadata_json = json.dumps(dump_metadata, indent=2) - detect_secrets_output = detect_secrets_scan( - data=metadata_json, - excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=blockstorage_client.audit_config.get( - "detect_secrets_plugins" - ), - ) - + original_metadata_keys = list(volume.metadata.keys()) + detect_secrets_output = batch_results.get(index) if detect_secrets_output: # Map line numbers back to metadata keys using the parallel list # Line numbering: line 1 = "{", line 2 = first key-value, etc. @@ -54,6 +55,7 @@ def execute(self) -> List[CheckReportOpenStack]: ) report.status = "FAIL" report.status_extended = f"Volume {volume.name} ({volume.id}) metadata contains potential secrets -> {secrets_string}." + annotate_verified_secrets(report, detect_secrets_output) else: report.status_extended = f"Volume {volume.name} ({volume.id}) has no metadata (no sensitive data exposure risk)." diff --git a/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.metadata.json b/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.metadata.json index 015a00986d4..c7f3e41f8e9 100644 --- a/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.metadata.json +++ b/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.metadata.json @@ -34,5 +34,5 @@ ], "DependsOn": [], "RelatedTo": [], - "Notes": "This check uses the detect-secrets library to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns and detect_secrets_plugins to customize detection. Metadata is world-readable within instance via 169.254.169.254." + "Notes": "This check uses Kingfisher to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns. Metadata is world-readable within instance via 169.254.169.254." } diff --git a/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.py b/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.py index 5df151c939f..bfca160b916 100644 --- a/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.py +++ b/prowler/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data.py @@ -2,7 +2,10 @@ from typing import List from prowler.lib.check.models import Check, CheckReportOpenStack -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.openstack.services.compute.compute_client import compute_client @@ -14,30 +17,28 @@ def execute(self) -> List[CheckReportOpenStack]: secrets_ignore_patterns = compute_client.audit_config.get( "secrets_ignore_patterns", [] ) + validate = compute_client.audit_config.get("secrets_validate", False) + instances = list(compute_client.instances) + + # Collect one payload per instance (its metadata) and scan them all in + # batched Kingfisher invocations instead of one subprocess per instance. + def payloads(): + for index, instance in enumerate(instances): + if instance.metadata: + yield index, json.dumps(dict(instance.metadata), indent=2) + + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) - for instance in compute_client.instances: + for index, instance in enumerate(instances): report = CheckReportOpenStack(metadata=self.metadata(), resource=instance) report.status = "PASS" report.status_extended = f"Instance {instance.name} ({instance.id}) metadata does not contain sensitive data." if instance.metadata: - # Build metadata dict and parallel list of keys (similar to AWS ECS pattern) - dump_metadata = {} - original_metadata_keys = [] - for key, value in instance.metadata.items(): - dump_metadata[key] = value - original_metadata_keys.append(key) - - # Convert metadata dict to JSON string for detect-secrets scanning - metadata_json = json.dumps(dump_metadata, indent=2) - detect_secrets_output = detect_secrets_scan( - data=metadata_json, - excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=compute_client.audit_config.get( - "detect_secrets_plugins" - ), - ) - + original_metadata_keys = list(instance.metadata.keys()) + detect_secrets_output = batch_results.get(index) if detect_secrets_output: # Map line numbers back to metadata keys using the parallel list # Line numbering: line 1 = "{", line 2 = first key-value, etc. @@ -50,6 +51,7 @@ def execute(self) -> List[CheckReportOpenStack]: ) report.status = "FAIL" report.status_extended = f"Instance {instance.name} ({instance.id}) metadata contains potential secrets -> {secrets_string}." + annotate_verified_secrets(report, detect_secrets_output) else: report.status_extended = f"Instance {instance.name} ({instance.id}) has no metadata (no sensitive data exposure risk)." diff --git a/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.metadata.json b/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.metadata.json index b3a39bd3f8a..37e7563c27c 100644 --- a/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.metadata.json +++ b/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.metadata.json @@ -35,5 +35,5 @@ ], "DependsOn": [], "RelatedTo": [], - "Notes": "This check uses the detect-secrets library to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns and detect_secrets_plugins to customize detection." + "Notes": "This check uses Kingfisher to scan for credentials. May produce false positives on metadata keys containing secret-like keywords. Findings should be reviewed manually. The audit_config allows configuring secrets_ignore_patterns to exclude specific patterns." } diff --git a/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.py b/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.py index 94d281bf328..920a807945b 100644 --- a/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.py +++ b/prowler/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data.py @@ -2,7 +2,10 @@ from typing import List from prowler.lib.check.models import Check, CheckReportOpenStack -from prowler.lib.utils.utils import detect_secrets_scan +from prowler.lib.utils.utils import ( + annotate_verified_secrets, + detect_secrets_scan_batch, +) from prowler.providers.openstack.services.objectstorage.objectstorage_client import ( objectstorage_client, ) @@ -16,8 +19,21 @@ def execute(self) -> List[CheckReportOpenStack]: secrets_ignore_patterns = objectstorage_client.audit_config.get( "secrets_ignore_patterns", [] ) + validate = objectstorage_client.audit_config.get("secrets_validate", False) + containers = list(objectstorage_client.containers) + + # Collect one payload per container (its metadata) and scan them all in + # batched Kingfisher invocations instead of one subprocess per container. + def payloads(): + for index, container in enumerate(containers): + if container.metadata: + yield index, json.dumps(dict(container.metadata), indent=2) + + batch_results = detect_secrets_scan_batch( + payloads(), excluded_secrets=secrets_ignore_patterns, validate=validate + ) - for container in objectstorage_client.containers: + for index, container in enumerate(containers): report = CheckReportOpenStack(metadata=self.metadata(), resource=container) report.status = "PASS" report.status_extended = ( @@ -25,23 +41,8 @@ def execute(self) -> List[CheckReportOpenStack]: ) if container.metadata: - # Build metadata dict and parallel list of keys - dump_metadata = {} - original_metadata_keys = [] - for key, value in container.metadata.items(): - dump_metadata[key] = value - original_metadata_keys.append(key) - - # Convert metadata dict to JSON string for detect-secrets scanning - metadata_json = json.dumps(dump_metadata, indent=2) - detect_secrets_output = detect_secrets_scan( - data=metadata_json, - excluded_secrets=secrets_ignore_patterns, - detect_secrets_plugins=objectstorage_client.audit_config.get( - "detect_secrets_plugins" - ), - ) - + original_metadata_keys = list(container.metadata.keys()) + detect_secrets_output = batch_results.get(index) if detect_secrets_output: # Map line numbers back to metadata keys using the parallel list # Line numbering: line 1 = "{", line 2 = first key-value, etc. @@ -56,6 +57,7 @@ def execute(self) -> List[CheckReportOpenStack]: ) report.status = "FAIL" report.status_extended = f"Container {container.name} metadata contains potential secrets -> {secrets_string}." + annotate_verified_secrets(report, detect_secrets_output) else: report.status_extended = f"Container {container.name} has no metadata (no sensitive data exposure risk)." diff --git a/pyproject.toml b/pyproject.toml index 37bb1b003a7..a645bd0d102 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,11 +72,11 @@ dependencies = [ "dash==3.1.1", "dash-bootstrap-components==2.0.3", "defusedxml==0.7.1", - "detect-secrets==1.5.0", "dulwich==1.2.5", "google-api-python-client==2.163.0", "google-auth-httplib2==0.2.0", "jsonschema==4.23.0", + "kingfisher-bin==1.104.0", "kubernetes==32.0.1", "linode-api4==5.45.0", "markdown==3.10.2", diff --git a/tests/config/schema/aws_schema_test.py b/tests/config/schema/aws_schema_test.py index ad08e84e3be..8731e08ba9c 100644 --- a/tests/config/schema/aws_schema_test.py +++ b/tests/config/schema/aws_schema_test.py @@ -129,47 +129,6 @@ def test_invalid_severity_levels_are_dropped(self, level): assert _validate({"ecr_repository_vulnerability_minimum_severity": level}) == {} -class Test_AWS_Detect_Secrets_Plugins: - def test_plugin_without_limit(self): - out = _validate({"detect_secrets_plugins": [{"name": "AWSKeyDetector"}]}) - assert out == {"detect_secrets_plugins": [{"name": "AWSKeyDetector"}]} - - def test_plugin_with_limit(self): - out = _validate( - { - "detect_secrets_plugins": [ - {"name": "Base64HighEntropyString", "limit": 6.0} - ] - } - ) - assert out == { - "detect_secrets_plugins": [ - {"name": "Base64HighEntropyString", "limit": 6.0} - ] - } - - def test_plugin_missing_name_drops_whole_field(self): - # ``name`` is required by the upstream library. - out = _validate({"detect_secrets_plugins": [{"limit": 6.0}]}) - assert out == {} - - def test_extra_plugin_kwargs_pass_through(self): - # Plugins can have arbitrary extra params (extra="allow" on the - # nested model). They must round-trip. - out = _validate( - { - "detect_secrets_plugins": [ - {"name": "Custom", "my_param": "abc", "other": 42} - ] - } - ) - assert out == { - "detect_secrets_plugins": [ - {"name": "Custom", "my_param": "abc", "other": 42} - ] - } - - class Test_AWS_Booleans: @pytest.mark.parametrize( "key", @@ -214,9 +173,5 @@ def test_full_default_config_round_trip(self): "threat_detection_enumeration_threshold": 0.3, "threat_detection_llm_jacking_threshold": 0.4, "ec2_high_risk_ports": [25, 110, 8088], - "detect_secrets_plugins": [ - {"name": "AWSKeyDetector"}, - {"name": "Base64HighEntropyString", "limit": 6.0}, - ], } assert _validate(raw) == raw diff --git a/tests/config/schema/bounds_test.py b/tests/config/schema/bounds_test.py index 0e5cad6056f..f29e73681ae 100644 --- a/tests/config/schema/bounds_test.py +++ b/tests/config/schema/bounds_test.py @@ -330,38 +330,6 @@ def test_invalid_rejected(self, value): assert _has_error_for(errors, "aws.trusted_ips") -class TestDetectSecretsEntropyBound: - """`detect_secrets_plugins[].limit` is Shannon entropy: 0..10.""" - - @pytest.mark.parametrize("value", [0.0, 3.5, 4.5, 8.0, 10.0]) - def test_valid(self, value): - assert ( - validate_scan_config( - { - "aws": { - "detect_secrets_plugins": [ - {"name": "Base64HighEntropyString", "limit": value} - ] - } - } - ) - == [] - ) - - @pytest.mark.parametrize("value", [-0.1, 10.01, 50]) - def test_invalid(self, value): - errors = validate_scan_config( - { - "aws": { - "detect_secrets_plugins": [ - {"name": "Base64HighEntropyString", "limit": value} - ] - } - } - ) - assert _has_error_for(errors, "aws.detect_secrets_plugins") - - class TestAdapterRobustness: """Top-level adapter behaviour the Prowler App backend depends on.""" diff --git a/tests/lib/utils/utils_test.py b/tests/lib/utils/utils_test.py index c8db5a62d0a..98262a875d5 100644 --- a/tests/lib/utils/utils_test.py +++ b/tests/lib/utils/utils_test.py @@ -7,7 +7,7 @@ from mock import patch from prowler.lib.utils.utils import ( - detect_secrets_scan, + detect_secrets_scan_batch, file_exists, get_file_permissions, hash_sha512, @@ -108,75 +108,47 @@ def test_validate_ip_address(self): assert not validate_ip_address("Not an IP") -class Test_detect_secrets_scan: - def test_detect_secrets_scan_data(self): - data = "password=password" - secrets_detected = detect_secrets_scan(data=data, excluded_secrets=[]) - assert type(secrets_detected) is list - assert len(secrets_detected) == 1 - assert "filename" in secrets_detected[0] - assert "hashed_secret" in secrets_detected[0] - assert "is_verified" in secrets_detected[0] - assert secrets_detected[0]["line_number"] == 1 - assert secrets_detected[0]["type"] == "Secret Keyword" - - def test_detect_secrets_scan_no_secrets_data(self): - data = "" - assert detect_secrets_scan(data=data) is None - - def test_detect_secrets_scan_file_with_secrets(self): - temp_data_file = tempfile.NamedTemporaryFile(delete=False) - temp_data_file.write(b"password=password") - temp_data_file.seek(0) - secrets_detected = detect_secrets_scan( - file=temp_data_file.name, excluded_secrets=[] +class Test_detect_secrets_scan_batch: + def test_batch_returns_findings_per_key(self): + results = detect_secrets_scan_batch( + { + "a": 'password = "Tr0ub4dor3xKq9vLmZ"', + "b": "just a normal config = value", + } ) - assert type(secrets_detected) is list - assert len(secrets_detected) == 1 - assert "filename" in secrets_detected[0] - assert "hashed_secret" in secrets_detected[0] - assert "is_verified" in secrets_detected[0] - assert secrets_detected[0]["line_number"] == 1 - assert secrets_detected[0]["type"] == "Secret Keyword" - os.remove(temp_data_file.name) - - def test_detect_secrets_scan_file_no_secrets(self): - temp_data_file = tempfile.NamedTemporaryFile(delete=False) - temp_data_file.write(b"no secrets") - temp_data_file.seek(0) - assert detect_secrets_scan(file=temp_data_file.name) is None - os.remove(temp_data_file.name) - - def test_detect_secrets_using_regex(self): - data = "MYSQL_ALLOW_EMPTY_PASSWORD=password" - secrets_detected = detect_secrets_scan( - data=data, excluded_secrets=[".*password"] + assert "a" in results + assert results["a"][0]["type"] == "Generic Password" + # keys without findings are omitted + assert "b" not in results + + def test_batch_no_dedup_reports_identical_secret_in_each_key(self): + # The same secret in two payloads must be reported for both (matches + # scanning each payload individually). + secret = "token = eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U" + results = detect_secrets_scan_batch({"a": secret, "b": secret}) + assert "a" in results + assert "b" in results + + def test_batch_excluded_secrets_filters(self): + results = detect_secrets_scan_batch( + {"a": 'DB_ALLOW_EMPTY_PASSWORD = "Tr0ub4dor3xKq9vLmZ"'}, + excluded_secrets=[".*ALLOW_EMPTY_PASSWORD.*"], ) - assert secrets_detected is None + assert results == {} - def test_detect_secrets_using_regex_file(self): - temp_data_file = tempfile.NamedTemporaryFile(delete=False) - temp_data_file.write(b"MYSQL_ALLOW_EMPTY_PASSWORD=password") - temp_data_file.seek(0) - secrets_detected = detect_secrets_scan( - file=temp_data_file.name, excluded_secrets=[".*password"] - ) - assert secrets_detected is None - os.remove(temp_data_file.name) + def test_batch_chunking_maps_all_keys(self): + payloads = {f"k{i}": f'password = "S3cr3tV4lu3xy{i}z"' for i in range(5)} + results = detect_secrets_scan_batch(payloads, chunk_size=2) + assert sorted(results.keys()) == ["k0", "k1", "k2", "k3", "k4"] + + def test_batch_empty_payloads(self): + assert detect_secrets_scan_batch({}) == {} - def test_detect_secrets_secrets_using_regex(self): - data = "MYSQL_ALLOW_EMPTY_PASSWORD=password, MYSQL_PASSWORD=password" - # Update the regex to exclude only the exact key "MYSQL_ALLOW_EMPTY_PASSWORD" - secrets_detected = detect_secrets_scan( - data=data, excluded_secrets=["^MYSQL_ALLOW_EMPTY_PASSWORD$"] + def test_batch_accepts_iterable_of_pairs(self): + results = detect_secrets_scan_batch( + iter([("x", 'password = "Tr0ub4dor3xKq9vLmZ"')]) ) - assert type(secrets_detected) is list - assert len(secrets_detected) == 1 - assert "filename" in secrets_detected[0] - assert "hashed_secret" in secrets_detected[0] - assert "is_verified" in secrets_detected[0] - assert secrets_detected[0]["line_number"] == 1 - assert secrets_detected[0]["type"] == "Secret Keyword" + assert "x" in results class Test_hash_sha512: diff --git a/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration_test.py b/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration_test.py index 10057618349..ec45b89a493 100644 --- a/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration_test.py +++ b/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/autoscaling_find_secrets_ec2_launch_configuration_test.py @@ -104,7 +104,7 @@ def test_one_autoscaling_with_secrets(self): InstanceType="t1.micro", KeyName="the_keys", SecurityGroups=["default", "default2"], - UserData="DB_PASSWORD=foobar123", + UserData='DB_PASSWORD="Tr0ub4dor3xKq9vLmZ"', ) launch_configuration_arn = autoscaling_client.describe_launch_configurations( LaunchConfigurationNames=[launch_configuration_name] @@ -341,7 +341,9 @@ def test_one_autoscaling_file_with_unicode_error(self): check = autoscaling_find_secrets_ec2_launch_configuration() result = check.execute() - assert len(result) == 0 + assert len(result) == 1 + assert result[0].status == "MANUAL" + assert "Could not decode User Data" in result[0].status_extended @mock_aws def test_one_autoscaling_file_invalid_gzip_error(self): @@ -381,4 +383,6 @@ def test_one_autoscaling_file_invalid_gzip_error(self): check = autoscaling_find_secrets_ec2_launch_configuration() result = check.execute() - assert len(result) == 0 + assert len(result) == 1 + assert result[0].status == "MANUAL" + assert "Could not decode User Data" in result[0].status_extended diff --git a/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/fixtures/fixture b/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/fixtures/fixture index 2fb5138932d..c591954ab4b 100644 --- a/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/fixtures/fixture +++ b/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/fixtures/fixture @@ -1,4 +1,4 @@ -DB_PASSWORD=foobar123 +DB_PASSWORD="Tr0ub4dor3xKq9vLmZ" DB_USER=foo -API_KEY=12345abcd -SERVICE_PASSWORD=bbaabb45 +API_KEY=s3rv1c3Acc0untS3cr3tV4lu3x9 +SERVICE_PASSWORD="Xy9zPq2wKmRtVbN4" diff --git a/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/fixtures/fixture.gz b/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/fixtures/fixture.gz index 6120fcfbc43..15e68af70e5 100644 Binary files a/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/fixtures/fixture.gz and b/tests/providers/aws/services/autoscaling/autoscaling_find_secrets_ec2_launch_configuration/fixtures/fixture.gz differ diff --git a/tests/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code_test.py b/tests/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code_test.py index 5f97082c1b5..79c4227bbac 100644 --- a/tests/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code_test.py +++ b/tests/providers/aws/services/awslambda/awslambda_function_no_secrets_in_code/awslambda_function_no_secrets_in_code_test.py @@ -19,7 +19,7 @@ LAMBDA_FUNCTION_ARN = f"arn:aws:lambda:{AWS_REGION_US_EAST_1}:{AWS_ACCOUNT_NUMBER}:function/{LAMBDA_FUNCTION_NAME}" LAMBDA_FUNCTION_CODE_WITH_SECRETS = """ def lambda_handler(event, context): - db_password = "test-password" + db_password = "Tr0ub4dor3xKq9vLmZ" print("custom log event") return event """ @@ -126,7 +126,7 @@ def test_function_code_with_secrets(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secret found in Lambda function {LAMBDA_FUNCTION_NAME} code -> lambda_function.py: Secret Keyword on line 3." + == f"Potential secret found in Lambda function {LAMBDA_FUNCTION_NAME} code -> lambda_function.py: Generic Password on line 3." ) assert result[0].resource_tags == [] diff --git a/tests/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables_test.py b/tests/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables_test.py index 6ae517dfe2a..df8c3a29da7 100644 --- a/tests/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables_test.py +++ b/tests/providers/aws/services/awslambda/awslambda_function_no_secrets_in_variables/awslambda_function_no_secrets_in_variables_test.py @@ -97,7 +97,7 @@ def test_function_secrets_in_keyword(self): arn=function_arn, region=AWS_REGION_US_EAST_1, runtime=function_runtime, - environment={"db_password": "test-password"}, + environment={"db_password": "Tr0ub4dor3xKq9vLmZ"}, ) } @@ -126,7 +126,7 @@ def test_function_secrets_in_keyword(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secret found in Lambda function {function_name} variables -> Secret Keyword in variable db_password." + == f"Potential secret found in Lambda function {function_name} variables -> Generic Password in variable db_password." ) assert result[0].resource_tags == [] @@ -145,7 +145,9 @@ def test_function_secrets_in_keyword_and_variable(self): arn=function_arn, region=AWS_REGION_US_EAST_1, runtime=function_runtime, - environment={"db_password": "srv://admin:pass@db"}, + environment={ + "db_password": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U" + }, ) } @@ -172,9 +174,18 @@ def test_function_secrets_in_keyword_and_variable(self): assert result[0].resource_id == function_name assert result[0].resource_arn == function_arn assert result[0].status == "FAIL" + # Kingfisher reports both the generic keyword rule and the JWT rule + # for the same value; their order is not guaranteed, so assert on + # presence rather than a fixed concatenation order. + assert result[0].status_extended.startswith( + f"Potential secret found in Lambda function {function_name} variables -> " + ) assert ( - result[0].status_extended - == f"Potential secret found in Lambda function {function_name} variables -> Secret Keyword in variable db_password, Basic Auth Credentials in variable db_password." + "Generic Password in variable db_password" in result[0].status_extended + ) + assert ( + "JSON Web Token (base64url-encoded) in variable db_password" + in result[0].status_extended ) assert result[0].resource_tags == [] @@ -191,7 +202,12 @@ def test_function_secrets_in_variables_telegram_token(self): arn=function_arn, region=AWS_REGION_US_EAST_1, runtime=function_runtime, - environment={"TELEGRAM_BOT_TOKEN": "telegram-token"}, + environment={ + # The Telegram bot-token rule is no longer enabled in + # Kingfisher's built-in ruleset, so a detectable JWT + # is used to keep this token-in-variable case meaningful. + "TELEGRAM_BOT_TOKEN": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U" + }, ) } @@ -217,13 +233,78 @@ def test_function_secrets_in_variables_telegram_token(self): assert result[0].region == AWS_REGION_US_EAST_1 assert result[0].resource_id == function_name assert result[0].resource_arn == function_arn - assert result[0].status == "PASS" + assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"No secrets found in Lambda function {function_name} variables." + == f"Potential secret found in Lambda function {function_name} variables -> JSON Web Token (base64url-encoded) in variable TELEGRAM_BOT_TOKEN." ) assert result[0].resource_tags == [] + def test_function_with_verified_secret(self): + from prowler.lib.check.models import Severity + + lambda_client = mock.MagicMock + function_name = "test-lambda" + function_runtime = "nodejs4.3" + function_arn = f"arn:aws:lambda:{AWS_REGION_US_EAST_1}:{AWS_ACCOUNT_NUMBER}:function/{function_name}" + lambda_client.audit_config = { + "secrets_ignore_patterns": [], + "secrets_validate": True, + } + + lambda_client.functions = { + "function_name": Function( + name=function_name, + security_groups=[], + arn=function_arn, + region=AWS_REGION_US_EAST_1, + runtime=function_runtime, + environment={"db_password": "test-value"}, + ) + } + + with ( + mock.patch( + "prowler.providers.common.provider.Provider.get_global_provider", + return_value=set_mocked_aws_provider( + audit_config={"secrets_validate": True} + ), + ), + mock.patch( + "prowler.providers.aws.services.awslambda.awslambda_function_no_secrets_in_variables.awslambda_function_no_secrets_in_variables.awslambda_client", + new=lambda_client, + ), + mock.patch( + "prowler.providers.aws.services.awslambda.awslambda_function_no_secrets_in_variables.awslambda_function_no_secrets_in_variables.detect_secrets_scan_batch", + return_value={ + 0: [ + { + "type": "JSON Web Token (base64url-encoded)", + "line_number": 2, + "filename": "data", + "hashed_secret": "x", + "is_verified": True, + } + ] + }, + ) as mock_scan, + ): + # Test Check + from prowler.providers.aws.services.awslambda.awslambda_function_no_secrets_in_variables.awslambda_function_no_secrets_in_variables import ( + awslambda_function_no_secrets_in_variables, + ) + + check = awslambda_function_no_secrets_in_variables() + result = check.execute() + + # The check must forward secrets_validate from the config to the scan. + assert mock_scan.call_args.kwargs.get("validate") is True + assert len(result) == 1 + assert result[0].status == "FAIL" + assert result[0].check_metadata.Severity == Severity.critical + assert "confirmed to be live" in result[0].status_extended + assert result[0].resource_id == function_name + def test_function_no_secrets_in_variables(self): lambda_client = mock.MagicMock function_name = "test-lambda" diff --git a/tests/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets_test.py b/tests/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets_test.py index 6b8d0d9b15e..ad9d2777882 100644 --- a/tests/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets_test.py +++ b/tests/providers/aws/services/cloudformation/cloudformation_stack_outputs_find_secrets/cloudformation_stack_outputs_find_secrets_test.py @@ -38,7 +38,10 @@ def test_stack_secret_in_outputs(self): Stack( arn="arn:aws:cloudformation:eu-west-1:123456789012:stack/Test-Stack/796c8d26-b390-41d7-a23c-0702c4e78b60", name=stack_name, - outputs=["DB_PASSWORD:foobar123", "ENV:DEV"], + outputs=[ + "DB_KEY:eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U", + "ENV:DEV", + ], region=AWS_REGION, ) ] @@ -66,7 +69,7 @@ def test_stack_secret_in_outputs(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secret found in CloudFormation Stack {stack_name} Outputs -> Secret Keyword in Output 1." + == f"Potential secret found in CloudFormation Stack {stack_name} Outputs -> JSON Web Token (base64url-encoded) in Output 1." ) assert result[0].resource_id == "Test-Stack" assert ( diff --git a/tests/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs_test.py b/tests/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs_test.py index e9eb00175b5..69db1621dc0 100644 --- a/tests/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs_test.py +++ b/tests/providers/aws/services/cloudwatch/cloudwatch_log_group_no_secrets_in_logs/cloudwatch_log_group_no_secrets_in_logs_test.py @@ -132,7 +132,7 @@ def test_cloudwatch_log_group_with_secrets(self): logEvents=[ { "timestamp": timestamp, - "message": "password = password123", + "message": 'password = "Tr0ub4dor3xKq9vLmZ"', } ], ) @@ -174,7 +174,7 @@ def test_cloudwatch_log_group_with_secrets(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secrets found in log group test in log stream test stream at {dttimestamp} - Secret Keyword on line 1." + == f"Potential secrets found in log group test in log stream test stream at {dttimestamp} - Generic Password on line 1." ) assert result[0].resource_id == "test" assert ( diff --git a/tests/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables_test.py b/tests/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables_test.py index c08d1c8bb34..a5df692d593 100644 --- a/tests/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables_test.py +++ b/tests/providers/aws/services/codebuild/codebuild_project_no_secrets_in_variables/codebuild_project_no_secrets_in_variables_test.py @@ -1,6 +1,10 @@ from unittest import mock -from tests.providers.aws.utils import AWS_ACCOUNT_NUMBER, AWS_REGION_US_EAST_1 +from tests.providers.aws.utils import ( + AWS_ACCOUNT_NUMBER, + AWS_REGION_US_EAST_1, + set_mocked_aws_provider, +) class Test_codebuild_project_no_secrets_in_variables: @@ -202,7 +206,11 @@ def test_project_with_sensitive_plaintext_credentials(self): environment_variables=[ { "name": "AWS_ACCESS_KEY_ID", - "value": "AKIAIOSFODNN7EXAMPLE", + # Realistic fake secret that Kingfisher detects. The classic + # "AKIAIOSFODNN7EXAMPLE" placeholder is suppressed by + # Kingfisher and its AWS Access Key rule is not enabled, so a + # detectable provider secret is used instead. + "value": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U", "type": "PLAINTEXT", } ], @@ -231,15 +239,100 @@ def test_project_with_sensitive_plaintext_credentials(self): assert len(result) == 1 assert result[0].status == "FAIL" + # The JWT paired with a "KEY" variable name yields both a + # JWT and a Generic API Key finding; order is non-deterministic. + assert result[0].status_extended.startswith( + "CodeBuild project SensitiveProject has sensitive environment plaintext credentials in variables:" + ) assert ( - result[0].status_extended - == "CodeBuild project SensitiveProject has sensitive environment plaintext credentials in variables: AWS Access Key in variable AWS_ACCESS_KEY_ID." + "JSON Web Token (base64url-encoded) in variable AWS_ACCESS_KEY_ID" + in result[0].status_extended + ) + assert ( + "Generic API Key in variable AWS_ACCESS_KEY_ID" + in result[0].status_extended ) assert result[0].region == AWS_REGION_US_EAST_1 assert result[0].resource_id == "SensitiveProject" assert result[0].resource_arn == project_arn assert result[0].resource_tags == [] + def test_project_with_verified_secret(self): + from prowler.lib.check.models import Severity + + codebuild_client = mock.MagicMock() + + from prowler.providers.aws.services.codebuild.codebuild_service import Project + + project_arn = f"arn:aws:codebuild:{AWS_REGION_US_EAST_1}:{AWS_ACCOUNT_NUMBER}:project/SensitiveProject" + codebuild_client.projects = { + project_arn: Project( + name="SensitiveProject", + arn=project_arn, + region=AWS_REGION_US_EAST_1, + last_invoked_time=None, + buildspec=None, + environment_variables=[ + { + "name": "EXAMPLE_VAR", + "value": "ExampleValue", + "type": "PLAINTEXT", + } + ], + tags=[], + ) + } + + codebuild_client.audit_config = { + "excluded_sensitive_environment_variables": [], + "secrets_validate": True, + } + + with ( + mock.patch( + "prowler.providers.common.provider.Provider.get_global_provider", + return_value=set_mocked_aws_provider( + audit_config={"secrets_validate": True} + ), + ), + mock.patch( + "prowler.providers.aws.services.codebuild.codebuild_service.Codebuild", + codebuild_client, + ), + mock.patch( + "prowler.providers.aws.services.codebuild.codebuild_project_no_secrets_in_variables.codebuild_project_no_secrets_in_variables.codebuild_client", + codebuild_client, + ), + mock.patch( + "prowler.providers.aws.services.codebuild.codebuild_project_no_secrets_in_variables.codebuild_project_no_secrets_in_variables.detect_secrets_scan_batch", + return_value={ + (0, 0): [ + { + "type": "JSON Web Token (base64url-encoded)", + "line_number": 1, + "filename": "data", + "hashed_secret": "x", + "is_verified": True, + } + ] + }, + ) as mock_scan, + ): + from prowler.providers.aws.services.codebuild.codebuild_project_no_secrets_in_variables.codebuild_project_no_secrets_in_variables import ( + codebuild_project_no_secrets_in_variables, + ) + + check = codebuild_project_no_secrets_in_variables() + result = check.execute() + + # The check must forward secrets_validate from the config to the scan. + assert mock_scan.call_args.kwargs.get("validate") is True + assert len(result) == 1 + assert result[0].status == "FAIL" + assert result[0].check_metadata.Severity == Severity.critical + assert "confirmed to be live" in result[0].status_extended + assert result[0].resource_id == "SensitiveProject" + def test_project_with_sensitive_plaintext_credentials_exluded(self): codebuild_client = mock.MagicMock @@ -373,12 +466,12 @@ def test_project_with_sensitive_plaintext_credentials_excluded_and_failed(self): environment_variables=[ { "name": "AWS_DUMB_ACCESS_KEY", - "value": "AKIAIOSFODNN7EXAMPLE", + "value": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U", "type": "PLAINTEXT", }, { "name": "AWS_ACCESS_KEY_ID", - "value": "AKIAIOSFODNN7EXAMPLE", + "value": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U", "type": "PLAINTEXT", }, ], @@ -409,10 +502,21 @@ def test_project_with_sensitive_plaintext_credentials_excluded_and_failed(self): assert len(result) == 1 assert result[0].status == "FAIL" + # AWS_DUMB_ACCESS_KEY is excluded, so only AWS_ACCESS_KEY_ID is + # scanned; its JWT + "KEY" name yields both a JWT and a + # Generic API Key finding with non-deterministic order. + assert result[0].status_extended.startswith( + "CodeBuild project SensitiveProject has sensitive environment plaintext credentials in variables:" + ) assert ( - result[0].status_extended - == "CodeBuild project SensitiveProject has sensitive environment plaintext credentials in variables: AWS Access Key in variable AWS_ACCESS_KEY_ID." + "JSON Web Token (base64url-encoded) in variable AWS_ACCESS_KEY_ID" + in result[0].status_extended + ) + assert ( + "Generic API Key in variable AWS_ACCESS_KEY_ID" + in result[0].status_extended ) + assert "AWS_DUMB_ACCESS_KEY" not in result[0].status_extended assert result[0].region == AWS_REGION_US_EAST_1 assert result[0].resource_id == "SensitiveProject" assert result[0].resource_arn == project_arn @@ -434,12 +538,12 @@ def test_project_with_multiple_sensitive_credentials(self): environment_variables=[ { "name": "AWS_DUMB_ACCESS_KEY", - "value": "AKIAIOSFODNN7EXAMPLE", + "value": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U", "type": "PLAINTEXT", }, { "name": "AWS_ACCESS_KEY_ID", - "value": "AKIAIOSFODNN7EXAMPLE", + "value": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U", "type": "PLAINTEXT", }, ], @@ -468,10 +572,21 @@ def test_project_with_multiple_sensitive_credentials(self): assert len(result) == 1 assert result[0].status == "FAIL" - assert ( - result[0].status_extended - == "CodeBuild project SensitiveProject has sensitive environment plaintext credentials in variables: AWS Access Key in variable AWS_DUMB_ACCESS_KEY, AWS Access Key in variable AWS_ACCESS_KEY_ID." + # Both variables hold a JWT and have "KEY" in their name, so + # each yields a JWT and a Generic API Key finding; order is + # non-deterministic. + assert result[0].status_extended.startswith( + "CodeBuild project SensitiveProject has sensitive environment plaintext credentials in variables:" ) + for var_name in ("AWS_DUMB_ACCESS_KEY", "AWS_ACCESS_KEY_ID"): + assert ( + f"JSON Web Token (base64url-encoded) in variable {var_name}" + in result[0].status_extended + ) + assert ( + f"Generic API Key in variable {var_name}" + in result[0].status_extended + ) assert result[0].region == AWS_REGION_US_EAST_1 assert result[0].resource_id == "SensitiveProject" assert result[0].resource_arn == project_arn diff --git a/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data_test.py b/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data_test.py index 0e8d303fe05..0745302af9e 100644 --- a/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data_test.py +++ b/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/ec2_instance_secrets_user_data_test.py @@ -100,7 +100,7 @@ def test_one_ec2_with_secrets(self): ImageId=EXAMPLE_AMI_ID, MinCount=1, MaxCount=1, - UserData="DB_PASSWORD=foobar123", + UserData='DB_PASSWORD="Tr0ub4dor3xKq9vLmZ"', )[0] from prowler.providers.aws.services.ec2.ec2_service import EC2 @@ -130,7 +130,7 @@ def test_one_ec2_with_secrets(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secret found in EC2 instance {instance.id} User Data -> Secret Keyword on line 1." + == f"Potential secret found in EC2 instance {instance.id} User Data -> Generic Password on line 1." ) assert result[0].resource_id == instance.id assert ( @@ -233,7 +233,7 @@ def test_one_ec2_file_with_secrets(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secret found in EC2 instance {instance.id} User Data -> Secret Keyword on line 1, Hex High Entropy String on line 3, Secret Keyword on line 3, Secret Keyword on line 4." + == f"Potential secret found in EC2 instance {instance.id} User Data -> Generic Password on line 1, JSON Web Token (base64url-encoded) on line 3, Generic Password on line 4." ) assert result[0].resource_id == instance.id assert ( @@ -327,7 +327,7 @@ def test_one_ec2_file_with_secrets_gzip(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secret found in EC2 instance {instance.id} User Data -> Secret Keyword on line 1, Hex High Entropy String on line 3, Secret Keyword on line 3, Secret Keyword on line 4." + == f"Potential secret found in EC2 instance {instance.id} User Data -> Generic Password on line 1, JSON Web Token (base64url-encoded) on line 3, Generic Password on line 4." ) assert result[0].resource_id == instance.id assert ( @@ -337,6 +337,64 @@ def test_one_ec2_file_with_secrets_gzip(self): assert result[0].resource_tags is None assert result[0].region == AWS_REGION_US_EAST_1 + @mock_aws + def test_one_ec2_with_verified_secret(self): + from prowler.lib.check.models import Severity + + ec2 = resource("ec2", region_name=AWS_REGION_US_EAST_1) + instance = ec2.create_instances( + ImageId=EXAMPLE_AMI_ID, + MinCount=1, + MaxCount=1, + UserData='STRIPE_KEY="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U"', + )[0] + + from prowler.providers.aws.services.ec2.ec2_service import EC2 + + aws_provider = set_mocked_aws_provider( + [AWS_REGION_EU_WEST_1, AWS_REGION_US_EAST_1], + audit_config={"secrets_validate": True}, + ) + + with ( + mock.patch( + "prowler.providers.common.provider.Provider.get_global_provider", + return_value=aws_provider, + ), + mock.patch( + "prowler.providers.aws.services.ec2.ec2_instance_secrets_user_data.ec2_instance_secrets_user_data.ec2_client", + new=EC2(aws_provider), + ), + mock.patch( + "prowler.providers.aws.services.ec2.ec2_instance_secrets_user_data.ec2_instance_secrets_user_data.detect_secrets_scan_batch", + return_value={ + 0: [ + { + "type": "JSON Web Token (base64url-encoded)", + "line_number": 1, + "filename": "data", + "hashed_secret": "x", + "is_verified": True, + } + ] + }, + ) as mock_scan, + ): + from prowler.providers.aws.services.ec2.ec2_instance_secrets_user_data.ec2_instance_secrets_user_data import ( + ec2_instance_secrets_user_data, + ) + + check = ec2_instance_secrets_user_data() + result = check.execute() + + # The check must forward secrets_validate from the config to the scan. + assert mock_scan.call_args.kwargs.get("validate") is True + assert len(result) == 1 + assert result[0].status == "FAIL" + assert result[0].check_metadata.Severity == Severity.critical + assert "confirmed to be live" in result[0].status_extended + assert result[0].resource_id == instance.id + @mock_aws def test_one_secrets_with_unicode_error(self): invalid_utf8_bytes = b"\xc0\xaf" @@ -368,4 +426,6 @@ def test_one_secrets_with_unicode_error(self): check = ec2_instance_secrets_user_data() result = check.execute() - assert len(result) == 0 + assert len(result) == 1 + assert result[0].status == "MANUAL" + assert "Could not decode User Data" in result[0].status_extended diff --git a/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/fixtures/fixture b/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/fixtures/fixture index 2fb5138932d..528ff40f8ff 100644 --- a/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/fixtures/fixture +++ b/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/fixtures/fixture @@ -1,4 +1,4 @@ -DB_PASSWORD=foobar123 +DB_PASSWORD="Tr0ub4dor3xKq9vLmZ" DB_USER=foo -API_KEY=12345abcd -SERVICE_PASSWORD=bbaabb45 +STRIPE_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U +SERVICE_PASSWORD="Xy9zPq2wKmRtVbN4" diff --git a/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/fixtures/fixture.gz b/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/fixtures/fixture.gz index 6120fcfbc43..859b38cb62b 100644 Binary files a/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/fixtures/fixture.gz and b/tests/providers/aws/services/ec2/ec2_instance_secrets_user_data/fixtures/fixture.gz differ diff --git a/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets_test.py b/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets_test.py index 0430ca5caec..dab6debb6b6 100644 --- a/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets_test.py +++ b/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/ec2_launch_template_no_secrets_test.py @@ -29,7 +29,9 @@ def mock_make_api_call(self, operation_name, kwarg): "VersionNumber": 123, "LaunchTemplateData": { "UserData": b64encode( - "DB_PASSWORD=foobar123".encode(encoding_format_utf_8) + 'DB_PASSWORD="Tr0ub4dor3xKq9vLmZ"'.encode( + encoding_format_utf_8 + ) ).decode(encoding_format_utf_8), "NetworkInterfaces": [{"AssociatePublicIpAddress": True}], }, @@ -164,7 +166,7 @@ def test_one_launch_template_with_secrets(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == "Potential secret found in User Data for EC2 Launch Template tester1 in template versions: Version 123: Secret Keyword on line 1." + == "Potential secret found in User Data for EC2 Launch Template tester1 in template versions: Version 123: Generic Password on line 1." ) assert result[0].resource_id == "lt-1234567890" assert result[0].region == AWS_REGION_US_EAST_1 @@ -212,7 +214,7 @@ def test_one_launch_template_with_secrets_in_multiple_versions(self): ) ec2_client.launch_templates = [launch_template] - ec2_client.audit_config = {"detect_secrets_plugins": None} + ec2_client.audit_config = {} with ( mock.patch( @@ -236,7 +238,7 @@ def test_one_launch_template_with_secrets_in_multiple_versions(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secret found in User Data for EC2 Launch Template {launch_template_name} in template versions: Version 1: Secret Keyword on line 1, Hex High Entropy String on line 3, Secret Keyword on line 3, Secret Keyword on line 4, Version 2: Secret Keyword on line 1, Hex High Entropy String on line 3, Secret Keyword on line 3, Secret Keyword on line 4." + == f"Potential secret found in User Data for EC2 Launch Template {launch_template_name} in template versions: Version 1: Generic Password on line 1, JSON Web Token (base64url-encoded) on line 3, Generic Password on line 4, Version 2: Generic Password on line 1, JSON Web Token (base64url-encoded) on line 3, Generic Password on line 4." ) assert result[0].resource_id == launch_template_id assert result[0].region == AWS_REGION_US_EAST_1 @@ -290,7 +292,7 @@ def test_one_launch_template_with_secrets_in_single_version(self): ) ec2_client.launch_templates = [launch_template] - ec2_client.audit_config = {"detect_secrets_plugins": None} + ec2_client.audit_config = {} with ( mock.patch( @@ -314,7 +316,7 @@ def test_one_launch_template_with_secrets_in_single_version(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secret found in User Data for EC2 Launch Template {launch_template_name} in template versions: Version 1: Secret Keyword on line 1, Hex High Entropy String on line 3, Secret Keyword on line 3, Secret Keyword on line 4." + == f"Potential secret found in User Data for EC2 Launch Template {launch_template_name} in template versions: Version 1: Generic Password on line 1, JSON Web Token (base64url-encoded) on line 3, Generic Password on line 4." ) assert result[0].resource_id == launch_template_id assert result[0].region == AWS_REGION_US_EAST_1 @@ -358,7 +360,7 @@ def test_one_launch_template_with_secrets_gzip(self): ) ec2_client.launch_templates = [launch_template] - ec2_client.audit_config = {"detect_secrets_plugins": None} + ec2_client.audit_config = {} with ( mock.patch( @@ -382,7 +384,7 @@ def test_one_launch_template_with_secrets_gzip(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secret found in User Data for EC2 Launch Template {launch_template_name} in template versions: Version 1: Secret Keyword on line 1, Hex High Entropy String on line 3, Secret Keyword on line 3, Secret Keyword on line 4." + == f"Potential secret found in User Data for EC2 Launch Template {launch_template_name} in template versions: Version 1: Generic Password on line 1, JSON Web Token (base64url-encoded) on line 3, Generic Password on line 4." ) assert result[0].resource_id == launch_template_id assert result[0].region == AWS_REGION_US_EAST_1 @@ -391,6 +393,81 @@ def test_one_launch_template_with_secrets_gzip(self): ) assert result[0].resource_tags == [] + def test_one_launch_template_with_verified_secret(self): + from prowler.lib.check.models import Severity + + ec2_client = mock.MagicMock() + launch_template_name = "tester" + launch_template_id = "lt-1234567890" + launch_template_arn = ( + f"arn:aws:ec2:us-east-1:123456789012:launch-template/{launch_template_id}" + ) + + launch_template_data = TemplateData( + user_data=b64encode( + "This is some user_data".encode(encoding_format_utf_8) + ).decode(encoding_format_utf_8), + associate_public_ip_address=True, + ) + + launch_template_versions = [ + LaunchTemplateVersion( + version_number=1, + template_data=launch_template_data, + ), + ] + + launch_template = LaunchTemplate( + name=launch_template_name, + id=launch_template_id, + arn=launch_template_arn, + region=AWS_REGION_US_EAST_1, + versions=launch_template_versions, + ) + + ec2_client.launch_templates = [launch_template] + ec2_client.audit_config = {"secrets_validate": True} + + with ( + mock.patch( + "prowler.providers.common.provider.Provider.get_global_provider", + return_value=ec2_client, + ), + mock.patch( + "prowler.providers.aws.services.ec2.ec2_launch_template_no_secrets.ec2_launch_template_no_secrets.ec2_client", + new=ec2_client, + ), + mock.patch( + "prowler.providers.aws.services.ec2.ec2_launch_template_no_secrets.ec2_launch_template_no_secrets.detect_secrets_scan_batch", + return_value={ + (0, 0): [ + { + "type": "JSON Web Token (base64url-encoded)", + "line_number": 1, + "filename": "data", + "hashed_secret": "x", + "is_verified": True, + } + ] + }, + ) as mock_scan, + ): + # Test Check + from prowler.providers.aws.services.ec2.ec2_launch_template_no_secrets.ec2_launch_template_no_secrets import ( + ec2_launch_template_no_secrets, + ) + + check = ec2_launch_template_no_secrets() + result = check.execute() + + # The check must forward secrets_validate from the config to the scan. + assert mock_scan.call_args.kwargs.get("validate") is True + assert len(result) == 1 + assert result[0].status == "FAIL" + assert result[0].check_metadata.Severity == Severity.critical + assert "confirmed to be live" in result[0].status_extended + assert result[0].resource_id == launch_template_id + @mock_aws def test_one_launch_template_without_user_data(self): launch_template_name = "tester" @@ -506,7 +583,7 @@ def test_two_launch_templates_one_template_with_secrets(self): launch_template_secrets, launch_template_no_secrets, ] - ec2_client.audit_config = {"detect_secrets_plugins": None} + ec2_client.audit_config = {} with ( mock.patch( @@ -530,7 +607,7 @@ def test_two_launch_templates_one_template_with_secrets(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secret found in User Data for EC2 Launch Template {launch_template_name1} in template versions: Version 1: Secret Keyword on line 1, Hex High Entropy String on line 3, Secret Keyword on line 3, Secret Keyword on line 4." + == f"Potential secret found in User Data for EC2 Launch Template {launch_template_name1} in template versions: Version 1: Generic Password on line 1, JSON Web Token (base64url-encoded) on line 3, Generic Password on line 4." ) assert result[0].resource_id == launch_template_id1 assert result[0].region == AWS_REGION_US_EAST_1 @@ -593,10 +670,10 @@ def test_one_launch_template_with_unicode_error(self): result = check.execute() assert len(result) == 1 - assert result[0].status == "PASS" + assert result[0].status == "MANUAL" assert ( - result[0].status_extended - == f"No secrets found in User Data of any version for EC2 Launch Template {launch_template_name}." + f"Could not decode User Data for EC2 Launch Template {launch_template_name}" + in result[0].status_extended ) assert result[0].resource_id == launch_template_id assert result[0].region == AWS_REGION_US_EAST_1 diff --git a/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/fixtures/fixture b/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/fixtures/fixture index 2fb5138932d..528ff40f8ff 100644 --- a/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/fixtures/fixture +++ b/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/fixtures/fixture @@ -1,4 +1,4 @@ -DB_PASSWORD=foobar123 +DB_PASSWORD="Tr0ub4dor3xKq9vLmZ" DB_USER=foo -API_KEY=12345abcd -SERVICE_PASSWORD=bbaabb45 +STRIPE_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U +SERVICE_PASSWORD="Xy9zPq2wKmRtVbN4" diff --git a/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/fixtures/fixture.gz b/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/fixtures/fixture.gz index 6120fcfbc43..859b38cb62b 100644 Binary files a/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/fixtures/fixture.gz and b/tests/providers/aws/services/ec2/ec2_launch_template_no_secrets/fixtures/fixture.gz differ diff --git a/tests/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets_test.py b/tests/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets_test.py index 24c27ccf0b1..753c00b0ef4 100644 --- a/tests/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets_test.py +++ b/tests/providers/aws/services/ecs/ecs_task_definitions_no_environment_secrets/ecs_task_definitions_no_environment_secrets_test.py @@ -11,9 +11,17 @@ ENV_VAR_NAME_NO_SECRETS = "host" ENV_VAR_VALUE_NO_SECRETS = "localhost:1234" ENV_VAR_NAME_WITH_KEYWORD = "DB_PASSWORD" -ENV_VAR_VALUE_WITH_SECRETS = "srv://admin:pass@db" +# Realistic fake secrets that Kingfisher actually detects (placeholders such as +# the previous "srv://admin:pass@db" basic-auth URL are no longer flagged). +# A JWT fires on any line of the dumped JSON (even when followed by a +# trailing comma); a keyword-named variable additionally fires the generic +# keyword rule when it is the last entry in the dump. +ENV_VAR_VALUE_WITH_SECRETS = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U" ENV_VAR_NAME_WITH_KEYWORD2 = "DATABASE_PASSWORD" -ENV_VAR_VALUE_WITH_SECRETS2 = "srv://admin:password@database" +ENV_VAR_VALUE_WITH_SECRETS2 = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI5ODc2NTQzMjEwIiwibmFtZSI6IkphbmUifQ.s5LqY8mC2pX1vN0bQwReTyUiOpAsDfGhJkLzXcVbNm0" +# Generic password/secret assignment value (detected only on the last entry of +# the JSON dump, where there is no trailing comma after the value). +ENV_VAR_VALUE_GENERIC_SECRET = "Tr0ub4dor3xKq9vLmZ" class Test_ecs_task_definitions_no_environment_secrets: @@ -143,7 +151,7 @@ def test_container_env_var_with_secret(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secrets found in ECS task definition {TASK_NAME} with revision {TASK_REVISION}: Secrets in container test-container -> Basic Auth Credentials on the environment variable host." + == f"Potential secrets found in ECS task definition {TASK_NAME} with revision {TASK_REVISION}: Secrets in container test-container -> JSON Web Token (base64url-encoded) on the environment variable host." ) assert result[0].resource_id == f"{TASK_NAME}:{TASK_REVISION}" assert result[0].resource_arn == task_arn @@ -167,7 +175,7 @@ def test_container_env_var_with_keyword(self): "environment": [ { "name": ENV_VAR_NAME_WITH_KEYWORD, - "value": ENV_VAR_VALUE_NO_SECRETS, + "value": ENV_VAR_VALUE_GENERIC_SECRET, } ], } @@ -198,7 +206,7 @@ def test_container_env_var_with_keyword(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secrets found in ECS task definition {TASK_NAME} with revision {TASK_REVISION}: Secrets in container test-container -> Secret Keyword on the environment variable DB_PASSWORD." + == f"Potential secrets found in ECS task definition {TASK_NAME} with revision {TASK_REVISION}: Secrets in container test-container -> Generic Password on the environment variable DB_PASSWORD." ) assert result[0].resource_id == f"{TASK_NAME}:{TASK_REVISION}" assert result[0].resource_arn == task_arn @@ -251,9 +259,20 @@ def test_container_env_var_with_keyword_and_secret(self): result = check.execute() assert len(result) == 1 assert result[0].status == "FAIL" + # The keyword-named variable holding a real secret triggers both the + # generic keyword rule and the JWT rule on the same line. + # Kingfisher emits same-line findings in a non-deterministic order, so + # assert both are present without pinning their order. + assert result[0].status_extended.startswith( + f"Potential secrets found in ECS task definition {TASK_NAME} with revision {TASK_REVISION}: Secrets in container test-container -> " + ) assert ( - result[0].status_extended - == f"Potential secrets found in ECS task definition {TASK_NAME} with revision {TASK_REVISION}: Secrets in container test-container -> Secret Keyword on the environment variable DB_PASSWORD, Basic Auth Credentials on the environment variable DB_PASSWORD." + "JSON Web Token (base64url-encoded) on the environment variable DB_PASSWORD" + in result[0].status_extended + ) + assert ( + "Generic Password on the environment variable DB_PASSWORD" + in result[0].status_extended ) assert result[0].resource_id == f"{TASK_NAME}:{TASK_REVISION}" assert result[0].resource_arn == task_arn @@ -310,9 +329,23 @@ def test_container_multiple_env_vars_with_keyword_and_secret(self): result = check.execute() assert len(result) == 1 assert result[0].status == "FAIL" + # DB_PASSWORD holds a JWT under a keyword name, so it fires + # both the JWT rule and the generic keyword rule on the + # same line (non-deterministic order); host holds a second JWT. + assert result[0].status_extended.startswith( + f"Potential secrets found in ECS task definition {TASK_NAME} with revision {TASK_REVISION}: Secrets in container test-container -> " + ) assert ( - result[0].status_extended - == f"Potential secrets found in ECS task definition {TASK_NAME} with revision {TASK_REVISION}: Secrets in container test-container -> Secret Keyword on the environment variable DB_PASSWORD, Basic Auth Credentials on the environment variable DB_PASSWORD, Basic Auth Credentials on the environment variable host." + "JSON Web Token (base64url-encoded) on the environment variable DB_PASSWORD" + in result[0].status_extended + ) + assert ( + "Generic Password on the environment variable DB_PASSWORD" + in result[0].status_extended + ) + assert ( + "JSON Web Token (base64url-encoded) on the environment variable host" + in result[0].status_extended ) assert result[0].resource_id == f"{TASK_NAME}:{TASK_REVISION}" assert result[0].resource_arn == task_arn @@ -340,7 +373,7 @@ def test_container_all_env_vars_with_keyword_and_secret(self): }, { "name": ENV_VAR_NAME_WITH_KEYWORD2, - "value": ENV_VAR_VALUE_WITH_SECRETS2, + "value": ENV_VAR_VALUE_GENERIC_SECRET, }, ], } @@ -369,9 +402,24 @@ def test_container_all_env_vars_with_keyword_and_secret(self): result = check.execute() assert len(result) == 1 assert result[0].status == "FAIL" + # DB_PASSWORD holds a JWT under a keyword name, so it fires + # both the JWT and the generic keyword rule on the same line + # (non-deterministic order); DATABASE_PASSWORD fires the generic + # keyword rule on its own line. + assert result[0].status_extended.startswith( + f"Potential secrets found in ECS task definition {TASK_NAME} with revision {TASK_REVISION}: Secrets in container test-container -> " + ) assert ( - result[0].status_extended - == f"Potential secrets found in ECS task definition {TASK_NAME} with revision {TASK_REVISION}: Secrets in container test-container -> Secret Keyword on the environment variable DB_PASSWORD, Basic Auth Credentials on the environment variable DB_PASSWORD, Basic Auth Credentials on the environment variable DATABASE_PASSWORD, Secret Keyword on the environment variable DATABASE_PASSWORD." + "JSON Web Token (base64url-encoded) on the environment variable DB_PASSWORD" + in result[0].status_extended + ) + assert ( + "Generic Password on the environment variable DB_PASSWORD" + in result[0].status_extended + ) + assert ( + "Generic Password on the environment variable DATABASE_PASSWORD" + in result[0].status_extended ) assert result[0].resource_id == f"{TASK_NAME}:{TASK_REVISION}" assert result[0].resource_arn == task_arn diff --git a/tests/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets_test.py b/tests/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets_test.py index 24f0a1fdd1a..9fc6de4762a 100644 --- a/tests/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets_test.py +++ b/tests/providers/aws/services/ssm/ssm_document_secrets/ssm_document_secrets_test.py @@ -27,13 +27,13 @@ def test_document_with_secrets(self): document_name = "test-document" document_arn = f"arn:aws:ssm:{AWS_REGION_US_EAST_1}:{AWS_ACCOUNT_NUMBER}:document/{document_name}" ssm_client.audited_account = AWS_ACCOUNT_NUMBER - ssm_client.audit_config = {"detect_secrets_plugins": None} + ssm_client.audit_config = {} ssm_client.documents = { document_name: Document( arn=document_arn, name=document_name, region=AWS_REGION_US_EAST_1, - content={"db_password": "test-password"}, + content={"db_password": "Tr0ub4dor3xKq9vLmZ"}, account_owners=[], ) } @@ -56,7 +56,7 @@ def test_document_with_secrets(self): assert result[0].status == "FAIL" assert ( result[0].status_extended - == f"Potential secret found in SSM Document {document_name} -> Secret Keyword on line 2." + == f"Potential secret found in SSM Document {document_name} -> Generic Password on line 2." ) def test_document_no_secrets(self): diff --git a/tests/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition_test.py b/tests/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition_test.py index 628525e5421..1b1fa1bfcac 100644 --- a/tests/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition_test.py +++ b/tests/providers/aws/services/stepfunctions/stepfunctions_statemachine_no_secrets_in_definition/stepfunctions_statemachine_no_secrets_in_definition_test.py @@ -147,7 +147,7 @@ def test_statemachine_with_secrets_in_definition(self): arn=statemachine_arn, name="TestStateMachine", status=StateMachineStatus.ACTIVE, - definition='{"Comment": "Example with secret", "StartAt": "MyTask", "States": {"MyTask": {"Type": "Task", "Parameters": {"api_key": "AKIAIOSFODNN7EXAMPLE"}, "End": true}}}', + definition='{"Comment": "Example with secret", "StartAt": "MyTask", "States": {"MyTask": {"Type": "Task", "Parameters": {"api_key": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U"}, "End": true}}}', region=AWS_REGION_US_EAST_1, type=StateMachineType.STANDARD, creation_date=datetime.now(), diff --git a/tests/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data_test.py b/tests/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data_test.py index 6be9ac48f5f..85b97da0a23 100644 --- a/tests/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data_test.py +++ b/tests/providers/openstack/services/blockstorage/blockstorage_snapshot_metadata_sensitive_data/blockstorage_snapshot_metadata_sensitive_data_test.py @@ -2,6 +2,7 @@ from unittest import mock +from prowler.lib.check.models import Severity from prowler.providers.openstack.services.blockstorage.blockstorage_service import ( SnapshotResource, ) @@ -141,7 +142,7 @@ def test_snapshot_password_in_metadata(self): status="available", size=50, volume_id="vol-1", - metadata={"db_password": "supersecret123"}, + metadata={"db_password": "Tr0ub4dor3xKq9vLmZ"}, project_id=OPENSTACK_PROJECT_ID, region=OPENSTACK_REGION, ) @@ -179,7 +180,9 @@ def test_snapshot_api_key_in_metadata(self): status="available", size=50, volume_id="vol-1", - metadata={"api_key": "sk-1234567890"}, + metadata={ + "api_key": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U" + }, project_id=OPENSTACK_PROJECT_ID, region=OPENSTACK_REGION, ) @@ -223,7 +226,9 @@ def test_snapshot_private_key_in_metadata(self): status="available", size=50, volume_id="vol-1", - metadata={"ssh_key": "-----BEGIN RSA PRIVATE KEY-----"}, + metadata={ + "ssh_key": "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQCUzlT9QGi8ZSr5\nk+LTRz/1TaiCCs6o1icW4cur0Q0hdBnbRJXUdjlQsgzmBvCBNkGHI8hb/RUPssvc\nDLU5kOQ3Wp2KgtbphhZ2PfpuJrzwHL1ejcJkRxegm/aTdmpoQKcxGeehAfHbmlLA\nxdfn6wPDfGji973yiRH56JRukJAaqF50HC2a/AVNC5HtZoVlbQ+WvVbYVUnPxNkv\nPpc53PjrBgWiTtdMONEqJ3jDiaqfUBt+TZYF0CFc9HgjnUniRX28OukDyLu+idOz\nFKyZxMXtqexkAvQLDW1PATpZgVQ7hJoCD8UVTXAtcgzPq5fA6AR2URiECHI6ZyL0\nUmixKfMNAgMBAAECggEAJRzp5wjdpmEgDQOkjpfGXJ6sAJUD8mmI8cTKeJWIzhdo\nDH8oVEdRJ65kl6lS6hMXWEZlJgYyrsnj3MPBnjQkKycbRCy6P59s8jwmfbsFI+iz\nFUZLXZm6i5jicGhYBRzc5hrlIYu73863RXOClAnSFDsu6K6rzfYASQFIJeRBwJfs\njqXinuun/h2zGjpiY+TtNsa8c+nC7f3sGsTzNJugDvBPWQzsnAMzXJqiyharre4V\no157XIOvdC0joIp8j/Ib1ZtMfz1K1LcgBgw0szSieIw0Rq8yQ0Ek7GtLh43jG+ap\nvcSEesTD1p4mjPXoWkPG8KYd4iwGedZaePfheVcKKQKBgQDNE03SWv18AH0d4fpB\nlFAtRybCfSvMORzBrt2oilz8wDmK+Zga5o+phCnM8v3eJy1v8BvIQ9RvwQA2uVgZ\nr701wNMpVrTsMujk83oVRhimZLk6Hyw07wmMgEHX7+izkm2Lk4Lk7Zol3VRfnWG6\nmIcUk7xB1yAs3mudsfx0VO0QyQKBgQC5wfdqCLj2hZk4sMZu8Bth+BHKChGItmDk\nAW7aNt+gaPyoryOJoi2OUO8ud8EyuqXiuslSk2pPtjvLhCppkoq6V8kmPAUzaxFk\n4nDEAxT9Un8IJ0j2ebv+koQKsBWjssbVSjrZgIcYIDK1QblgbCp2FSE3ima+V8ip\nOdNjiatWJQKBgEX8lox5nRSanhh6rIuA8DPjmmi5ix7xRs0avm7seXuQppK1R6G2\nmcTCY/mb2+Pa/vi6uuCHtZJGDaqfal+pyCr2GZp8CtapMS4hocJs37C5ozUguld+\nVIXsp4voRkQybsw5lWxHYloVxNu0vEuQDlmJabAWmNZ3OcbhnUSeTyFxAoGAFtkZ\n0owCHChwoT11Gt4jsBgwL/avE27DWigm92Y6eWOQeDsalupAyjmAQenu9Itqrgml\ni6egMu/KSQ0Xnmas86CqmC5XwWxQ9mS31BRA96u2/ky+t7pfej+RSDNCZiEuPbvk\noy4g78G+GvdbktWbH20X6dn3K0Bm6RG4w4yCa5UCgYBs0zAVs0DZmM8SUZJA/HuQ\nN6a1vKKns7xKw5N3SmX1KbDhx5LSZXfbUo2+QktE7iRf9G2f1o0q8kz9l/4AGXi1\nKJNUHupWoaQzGNrzAb27TUtFA0ocMG8KnqxjANWox5oPJS9OU5tw5H5dxeI/Senc\nkYW6eCnRzPcmBqex6Vuw4w==\n-----END PRIVATE KEY-----\n" + }, project_id=OPENSTACK_PROJECT_ID, region=OPENSTACK_REGION, ) @@ -277,7 +282,7 @@ def test_multiple_snapshots_mixed(self): status="available", size=50, volume_id="vol-2", - metadata={"admin_password": "secret123"}, + metadata={"admin_password": "Tr0ub4dor3xKq9vLmZ"}, project_id=OPENSTACK_PROJECT_ID, region=OPENSTACK_REGION, ), @@ -318,7 +323,7 @@ def test_snapshot_metadata_key_correct_identification(self): metadata={ "environment": "production", "application": "web-app", - "db_password": "supersecret123", + "db_password": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U", "region": "us-east", }, project_id=OPENSTACK_PROJECT_ID, @@ -348,3 +353,57 @@ def test_snapshot_metadata_key_correct_identification(self): # Verify the secret is correctly attributed to 'db_password' key assert "in metadata key 'db_password'" in result[0].status_extended assert result[0].resource_id == "snap-6" + + def test_snapshot_verified_secret_escalates_to_critical(self): + """Test that a confirmed live secret escalates the finding to CRITICAL (FAIL).""" + blockstorage_client = mock.MagicMock() + blockstorage_client.audit_config = {"secrets_validate": True} + blockstorage_client.snapshots = [ + SnapshotResource( + id="snap-verified", + name="Verified Secret", + status="available", + size=50, + volume_id="vol-1", + metadata={"api_key": "placeholder"}, + project_id=OPENSTACK_PROJECT_ID, + region=OPENSTACK_REGION, + ) + ] + + with ( + mock.patch( + "prowler.providers.common.provider.Provider.get_global_provider", + return_value=set_mocked_openstack_provider(), + ), + mock.patch( + "prowler.providers.openstack.services.blockstorage.blockstorage_snapshot_metadata_sensitive_data.blockstorage_snapshot_metadata_sensitive_data.blockstorage_client", + new=blockstorage_client, + ), + mock.patch( + "prowler.providers.openstack.services.blockstorage.blockstorage_snapshot_metadata_sensitive_data.blockstorage_snapshot_metadata_sensitive_data.detect_secrets_scan_batch", + return_value={ + 0: [ + { + "type": "JSON Web Token (base64url-encoded)", + "line_number": 2, + "filename": "data", + "hashed_secret": "x", + "is_verified": True, + } + ] + }, + ) as mock_scan, + ): + from prowler.providers.openstack.services.blockstorage.blockstorage_snapshot_metadata_sensitive_data.blockstorage_snapshot_metadata_sensitive_data import ( + blockstorage_snapshot_metadata_sensitive_data, + ) + + check = blockstorage_snapshot_metadata_sensitive_data() + result = check.execute() + + assert len(result) == 1 + assert result[0].status == "FAIL" + assert result[0].check_metadata.Severity == Severity.critical + assert "confirmed to be live" in result[0].status_extended + assert mock_scan.call_args.kwargs.get("validate") is True diff --git a/tests/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data_test.py b/tests/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data_test.py index e12babb23c4..80927e2f9dd 100644 --- a/tests/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data_test.py +++ b/tests/providers/openstack/services/blockstorage/blockstorage_volume_metadata_sensitive_data/blockstorage_volume_metadata_sensitive_data_test.py @@ -2,6 +2,7 @@ from unittest import mock +from prowler.lib.check.models import Severity from prowler.providers.openstack.services.blockstorage.blockstorage_service import ( VolumeResource, ) @@ -159,7 +160,7 @@ def test_volume_password_in_metadata(self): is_bootable=False, is_multiattach=False, attachments=[], - metadata={"db_password": "supersecret123"}, + metadata={"db_password": "Tr0ub4dor3xKq9vLmZ"}, availability_zone="nova", snapshot_id="", source_volume_id="", @@ -204,7 +205,9 @@ def test_volume_api_key_in_metadata(self): is_bootable=False, is_multiattach=False, attachments=[], - metadata={"api_key": "sk-1234567890"}, + metadata={ + "api_key": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U" + }, availability_zone="nova", snapshot_id="", source_volume_id="", @@ -255,7 +258,9 @@ def test_volume_private_key_in_metadata(self): is_bootable=False, is_multiattach=False, attachments=[], - metadata={"ssh_key": "-----BEGIN RSA PRIVATE KEY-----"}, + metadata={ + "ssh_key": "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQCUzlT9QGi8ZSr5\nk+LTRz/1TaiCCs6o1icW4cur0Q0hdBnbRJXUdjlQsgzmBvCBNkGHI8hb/RUPssvc\nDLU5kOQ3Wp2KgtbphhZ2PfpuJrzwHL1ejcJkRxegm/aTdmpoQKcxGeehAfHbmlLA\nxdfn6wPDfGji973yiRH56JRukJAaqF50HC2a/AVNC5HtZoVlbQ+WvVbYVUnPxNkv\nPpc53PjrBgWiTtdMONEqJ3jDiaqfUBt+TZYF0CFc9HgjnUniRX28OukDyLu+idOz\nFKyZxMXtqexkAvQLDW1PATpZgVQ7hJoCD8UVTXAtcgzPq5fA6AR2URiECHI6ZyL0\nUmixKfMNAgMBAAECggEAJRzp5wjdpmEgDQOkjpfGXJ6sAJUD8mmI8cTKeJWIzhdo\nDH8oVEdRJ65kl6lS6hMXWEZlJgYyrsnj3MPBnjQkKycbRCy6P59s8jwmfbsFI+iz\nFUZLXZm6i5jicGhYBRzc5hrlIYu73863RXOClAnSFDsu6K6rzfYASQFIJeRBwJfs\njqXinuun/h2zGjpiY+TtNsa8c+nC7f3sGsTzNJugDvBPWQzsnAMzXJqiyharre4V\no157XIOvdC0joIp8j/Ib1ZtMfz1K1LcgBgw0szSieIw0Rq8yQ0Ek7GtLh43jG+ap\nvcSEesTD1p4mjPXoWkPG8KYd4iwGedZaePfheVcKKQKBgQDNE03SWv18AH0d4fpB\nlFAtRybCfSvMORzBrt2oilz8wDmK+Zga5o+phCnM8v3eJy1v8BvIQ9RvwQA2uVgZ\nr701wNMpVrTsMujk83oVRhimZLk6Hyw07wmMgEHX7+izkm2Lk4Lk7Zol3VRfnWG6\nmIcUk7xB1yAs3mudsfx0VO0QyQKBgQC5wfdqCLj2hZk4sMZu8Bth+BHKChGItmDk\nAW7aNt+gaPyoryOJoi2OUO8ud8EyuqXiuslSk2pPtjvLhCppkoq6V8kmPAUzaxFk\n4nDEAxT9Un8IJ0j2ebv+koQKsBWjssbVSjrZgIcYIDK1QblgbCp2FSE3ima+V8ip\nOdNjiatWJQKBgEX8lox5nRSanhh6rIuA8DPjmmi5ix7xRs0avm7seXuQppK1R6G2\nmcTCY/mb2+Pa/vi6uuCHtZJGDaqfal+pyCr2GZp8CtapMS4hocJs37C5ozUguld+\nVIXsp4voRkQybsw5lWxHYloVxNu0vEuQDlmJabAWmNZ3OcbhnUSeTyFxAoGAFtkZ\n0owCHChwoT11Gt4jsBgwL/avE27DWigm92Y6eWOQeDsalupAyjmAQenu9Itqrgml\ni6egMu/KSQ0Xnmas86CqmC5XwWxQ9mS31BRA96u2/ky+t7pfej+RSDNCZiEuPbvk\noy4g78G+GvdbktWbH20X6dn3K0Bm6RG4w4yCa5UCgYBs0zAVs0DZmM8SUZJA/HuQ\nN6a1vKKns7xKw5N3SmX1KbDhx5LSZXfbUo2+QktE7iRf9G2f1o0q8kz9l/4AGXi1\nKJNUHupWoaQzGNrzAb27TUtFA0ocMG8KnqxjANWox5oPJS9OU5tw5H5dxeI/Senc\nkYW6eCnRzPcmBqex6Vuw4w==\n-----END PRIVATE KEY-----\n" + }, availability_zone="nova", snapshot_id="", source_volume_id="", @@ -323,7 +328,7 @@ def test_multiple_volumes_mixed(self): is_bootable=False, is_multiattach=False, attachments=[], - metadata={"admin_password": "secret123"}, + metadata={"admin_password": "Tr0ub4dor3xKq9vLmZ"}, availability_zone="nova", snapshot_id="", source_volume_id="", @@ -371,7 +376,7 @@ def test_volume_metadata_key_correct_identification(self): metadata={ "environment": "production", "application": "web-app", - "db_password": "supersecret123", + "db_password": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U", "region": "us-east", }, availability_zone="nova", @@ -404,3 +409,64 @@ def test_volume_metadata_key_correct_identification(self): # Verify the secret is correctly attributed to 'db_password' key assert "in metadata key 'db_password'" in result[0].status_extended assert result[0].resource_id == "vol-6" + + def test_volume_verified_secret_escalates_to_critical(self): + """Test that a confirmed live secret escalates the finding to CRITICAL (FAIL).""" + blockstorage_client = mock.MagicMock() + blockstorage_client.audit_config = {"secrets_validate": True} + blockstorage_client.volumes = [ + VolumeResource( + id="vol-verified", + name="Verified Secret", + status="in-use", + size=100, + volume_type="standard", + is_encrypted=False, + is_bootable=False, + is_multiattach=False, + attachments=[], + metadata={"api_key": "placeholder"}, + availability_zone="nova", + snapshot_id="", + source_volume_id="", + project_id=OPENSTACK_PROJECT_ID, + region=OPENSTACK_REGION, + ) + ] + + with ( + mock.patch( + "prowler.providers.common.provider.Provider.get_global_provider", + return_value=set_mocked_openstack_provider(), + ), + mock.patch( + "prowler.providers.openstack.services.blockstorage.blockstorage_volume_metadata_sensitive_data.blockstorage_volume_metadata_sensitive_data.blockstorage_client", + new=blockstorage_client, + ), + mock.patch( + "prowler.providers.openstack.services.blockstorage.blockstorage_volume_metadata_sensitive_data.blockstorage_volume_metadata_sensitive_data.detect_secrets_scan_batch", + return_value={ + 0: [ + { + "type": "JSON Web Token (base64url-encoded)", + "line_number": 2, + "filename": "data", + "hashed_secret": "x", + "is_verified": True, + } + ] + }, + ) as mock_scan, + ): + from prowler.providers.openstack.services.blockstorage.blockstorage_volume_metadata_sensitive_data.blockstorage_volume_metadata_sensitive_data import ( + blockstorage_volume_metadata_sensitive_data, + ) + + check = blockstorage_volume_metadata_sensitive_data() + result = check.execute() + + assert len(result) == 1 + assert result[0].status == "FAIL" + assert result[0].check_metadata.Severity == Severity.critical + assert "confirmed to be live" in result[0].status_extended + assert mock_scan.call_args.kwargs.get("validate") is True diff --git a/tests/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data_test.py b/tests/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data_test.py index 174a8ab83f4..790f8aa0972 100644 --- a/tests/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data_test.py +++ b/tests/providers/openstack/services/compute/compute_instance_metadata_sensitive_data/compute_instance_metadata_sensitive_data_test.py @@ -2,6 +2,7 @@ from unittest import mock +from prowler.lib.check.models import Severity from prowler.providers.openstack.services.compute.compute_service import ComputeInstance from tests.providers.openstack.openstack_fixtures import ( OPENSTACK_PROJECT_ID, @@ -181,7 +182,7 @@ def test_instance_password_in_metadata(self): private_v6="", networks={}, has_config_drive=False, - metadata={"db_password": "supersecret123"}, + metadata={"db_password": "Tr0ub4dor3xKq9vLmZ"}, user_data="", trusted_image_certificates=[], ) @@ -233,7 +234,9 @@ def test_instance_api_key_in_metadata(self): private_v6="", networks={}, has_config_drive=False, - metadata={"api_key": "sk-1234567890"}, + metadata={ + "api_key": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U" + }, user_data="", trusted_image_certificates=[], ) @@ -349,7 +352,9 @@ def test_instance_private_key_in_metadata(self): private_v6="", networks={}, has_config_drive=False, - metadata={"ssh_key": "-----BEGIN RSA PRIVATE KEY-----"}, + metadata={ + "ssh_key": "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQCUzlT9QGi8ZSr5\nk+LTRz/1TaiCCs6o1icW4cur0Q0hdBnbRJXUdjlQsgzmBvCBNkGHI8hb/RUPssvc\nDLU5kOQ3Wp2KgtbphhZ2PfpuJrzwHL1ejcJkRxegm/aTdmpoQKcxGeehAfHbmlLA\nxdfn6wPDfGji973yiRH56JRukJAaqF50HC2a/AVNC5HtZoVlbQ+WvVbYVUnPxNkv\nPpc53PjrBgWiTtdMONEqJ3jDiaqfUBt+TZYF0CFc9HgjnUniRX28OukDyLu+idOz\nFKyZxMXtqexkAvQLDW1PATpZgVQ7hJoCD8UVTXAtcgzPq5fA6AR2URiECHI6ZyL0\nUmixKfMNAgMBAAECggEAJRzp5wjdpmEgDQOkjpfGXJ6sAJUD8mmI8cTKeJWIzhdo\nDH8oVEdRJ65kl6lS6hMXWEZlJgYyrsnj3MPBnjQkKycbRCy6P59s8jwmfbsFI+iz\nFUZLXZm6i5jicGhYBRzc5hrlIYu73863RXOClAnSFDsu6K6rzfYASQFIJeRBwJfs\njqXinuun/h2zGjpiY+TtNsa8c+nC7f3sGsTzNJugDvBPWQzsnAMzXJqiyharre4V\no157XIOvdC0joIp8j/Ib1ZtMfz1K1LcgBgw0szSieIw0Rq8yQ0Ek7GtLh43jG+ap\nvcSEesTD1p4mjPXoWkPG8KYd4iwGedZaePfheVcKKQKBgQDNE03SWv18AH0d4fpB\nlFAtRybCfSvMORzBrt2oilz8wDmK+Zga5o+phCnM8v3eJy1v8BvIQ9RvwQA2uVgZ\nr701wNMpVrTsMujk83oVRhimZLk6Hyw07wmMgEHX7+izkm2Lk4Lk7Zol3VRfnWG6\nmIcUk7xB1yAs3mudsfx0VO0QyQKBgQC5wfdqCLj2hZk4sMZu8Bth+BHKChGItmDk\nAW7aNt+gaPyoryOJoi2OUO8ud8EyuqXiuslSk2pPtjvLhCppkoq6V8kmPAUzaxFk\n4nDEAxT9Un8IJ0j2ebv+koQKsBWjssbVSjrZgIcYIDK1QblgbCp2FSE3ima+V8ip\nOdNjiatWJQKBgEX8lox5nRSanhh6rIuA8DPjmmi5ix7xRs0avm7seXuQppK1R6G2\nmcTCY/mb2+Pa/vi6uuCHtZJGDaqfal+pyCr2GZp8CtapMS4hocJs37C5ozUguld+\nVIXsp4voRkQybsw5lWxHYloVxNu0vEuQDlmJabAWmNZ3OcbhnUSeTyFxAoGAFtkZ\n0owCHChwoT11Gt4jsBgwL/avE27DWigm92Y6eWOQeDsalupAyjmAQenu9Itqrgml\ni6egMu/KSQ0Xnmas86CqmC5XwWxQ9mS31BRA96u2/ky+t7pfej+RSDNCZiEuPbvk\noy4g78G+GvdbktWbH20X6dn3K0Bm6RG4w4yCa5UCgYBs0zAVs0DZmM8SUZJA/HuQ\nN6a1vKKns7xKw5N3SmX1KbDhx5LSZXfbUo2+QktE7iRf9G2f1o0q8kz9l/4AGXi1\nKJNUHupWoaQzGNrzAb27TUtFA0ocMG8KnqxjANWox5oPJS9OU5tw5H5dxeI/Senc\nkYW6eCnRzPcmBqex6Vuw4w==\n-----END PRIVATE KEY-----\n" + }, user_data="", trusted_image_certificates=[], ) @@ -431,7 +436,7 @@ def test_multiple_instances_mixed(self): private_v6="", networks={}, has_config_drive=False, - metadata={"admin_password": "secret123"}, + metadata={"admin_password": "Tr0ub4dor3xKq9vLmZ"}, user_data="", trusted_image_certificates=[], ), @@ -486,7 +491,7 @@ def test_instance_multiple_metadata_keys_correct_identification(self): metadata={ "environment": "production", "application": "web-app", - "db_password": "supersecret123", + "db_password": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U", "region": "us-east", }, user_data="", @@ -544,7 +549,7 @@ def test_instance_metadata_key_ordering(self): has_config_drive=False, metadata={ "first_key": "safe_value", - "api_key": "sk-1234567890abcdef", + "api_key": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U", "third_key": "also_safe", }, user_data="", @@ -574,3 +579,71 @@ def test_instance_metadata_key_ordering(self): # Verify the secret is correctly attributed to 'api_key' key (second in order) assert "in metadata key 'api_key'" in result[0].status_extended assert result[0].resource_id == "instance-8" + + def test_instance_verified_secret_escalates_to_critical(self): + """Test that a confirmed live secret escalates the finding to CRITICAL (FAIL).""" + compute_client = mock.MagicMock() + compute_client.audit_config = {"secrets_validate": True} + compute_client.instances = [ + ComputeInstance( + id="instance-verified", + name="Verified Secret", + status="ACTIVE", + flavor_id="flavor-1", + security_groups=["default"], + region=OPENSTACK_REGION, + project_id=OPENSTACK_PROJECT_ID, + is_locked=False, + locked_reason="", + key_name="", + user_id="", + access_ipv4="", + access_ipv6="", + public_v4="", + public_v6="", + private_v4="", + private_v6="", + networks={}, + has_config_drive=False, + metadata={"api_key": "placeholder"}, + user_data="", + trusted_image_certificates=[], + ) + ] + + with ( + mock.patch( + "prowler.providers.common.provider.Provider.get_global_provider", + return_value=set_mocked_openstack_provider(), + ), + mock.patch( + "prowler.providers.openstack.services.compute.compute_instance_metadata_sensitive_data.compute_instance_metadata_sensitive_data.compute_client", + new=compute_client, + ), + mock.patch( + "prowler.providers.openstack.services.compute.compute_instance_metadata_sensitive_data.compute_instance_metadata_sensitive_data.detect_secrets_scan_batch", + return_value={ + 0: [ + { + "type": "JSON Web Token (base64url-encoded)", + "line_number": 2, + "filename": "data", + "hashed_secret": "x", + "is_verified": True, + } + ] + }, + ) as mock_scan, + ): + from prowler.providers.openstack.services.compute.compute_instance_metadata_sensitive_data.compute_instance_metadata_sensitive_data import ( + compute_instance_metadata_sensitive_data, + ) + + check = compute_instance_metadata_sensitive_data() + result = check.execute() + + assert len(result) == 1 + assert result[0].status == "FAIL" + assert result[0].check_metadata.Severity == Severity.critical + assert "confirmed to be live" in result[0].status_extended + assert mock_scan.call_args.kwargs.get("validate") is True diff --git a/tests/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data_test.py b/tests/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data_test.py index 6cae6432c73..eb922742320 100644 --- a/tests/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data_test.py +++ b/tests/providers/openstack/services/objectstorage/objectstorage_container_metadata_sensitive_data/objectstorage_container_metadata_sensitive_data_test.py @@ -2,6 +2,7 @@ from unittest import mock +from prowler.lib.check.models import Severity from prowler.providers.openstack.services.objectstorage.objectstorage_service import ( ObjectStorageContainer, ) @@ -157,7 +158,7 @@ def test_container_password_in_metadata(self): history_location="", sync_to="", sync_key="", - metadata={"db_password": "supersecret123"}, + metadata={"db_password": "Tr0ub4dor3xKq9vLmZ"}, ) ] @@ -217,7 +218,7 @@ def test_multiple_containers_mixed(self): history_location="", sync_to="", sync_key="", - metadata={"admin_password": "secret123"}, + metadata={"admin_password": "Tr0ub4dor3xKq9vLmZ"}, ), ] @@ -241,3 +242,63 @@ def test_multiple_containers_mixed(self): assert len(result) == 2 assert len([r for r in result if r.status == "PASS"]) == 1 assert len([r for r in result if r.status == "FAIL"]) == 1 + + def test_container_verified_secret_escalates_to_critical(self): + """Test that a confirmed live secret escalates the finding to CRITICAL (FAIL).""" + objectstorage_client = mock.MagicMock() + objectstorage_client.audit_config = {"secrets_validate": True} + objectstorage_client.containers = [ + ObjectStorageContainer( + id="container-verified", + name="verified-secret", + region=OPENSTACK_REGION, + project_id=OPENSTACK_PROJECT_ID, + object_count=0, + bytes_used=0, + read_ACL="", + write_ACL="", + versioning_enabled=False, + versions_location="", + history_location="", + sync_to="", + sync_key="", + metadata={"api_key": "placeholder"}, + ) + ] + + with ( + mock.patch( + "prowler.providers.common.provider.Provider.get_global_provider", + return_value=set_mocked_openstack_provider(), + ), + mock.patch( + "prowler.providers.openstack.services.objectstorage.objectstorage_container_metadata_sensitive_data.objectstorage_container_metadata_sensitive_data.objectstorage_client", + new=objectstorage_client, + ), + mock.patch( + "prowler.providers.openstack.services.objectstorage.objectstorage_container_metadata_sensitive_data.objectstorage_container_metadata_sensitive_data.detect_secrets_scan_batch", + return_value={ + 0: [ + { + "type": "JSON Web Token (base64url-encoded)", + "line_number": 2, + "filename": "data", + "hashed_secret": "x", + "is_verified": True, + } + ] + }, + ) as mock_scan, + ): + from prowler.providers.openstack.services.objectstorage.objectstorage_container_metadata_sensitive_data.objectstorage_container_metadata_sensitive_data import ( + objectstorage_container_metadata_sensitive_data, + ) + + check = objectstorage_container_metadata_sensitive_data() + result = check.execute() + + assert len(result) == 1 + assert result[0].status == "FAIL" + assert result[0].check_metadata.Severity == Severity.critical + assert "confirmed to be live" in result[0].status_extended + assert mock_scan.call_args.kwargs.get("validate") is True diff --git a/uv.lock b/uv.lock index efd7008d204..3365c13f177 100644 --- a/uv.lock +++ b/uv.lock @@ -1795,19 +1795,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/84/d0/205d54408c08b13550c733c4b85429e7ead111c7f0014309637425520a9a/deprecated-1.3.1-py2.py3-none-any.whl", hash = "sha256:597bfef186b6f60181535a29fbe44865ce137a5079f295b479886c82729d5f3f", size = 11298, upload-time = "2025-10-30T08:19:00.758Z" }, ] -[[package]] -name = "detect-secrets" -version = "1.5.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pyyaml" }, - { name = "requests" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/69/67/382a863fff94eae5a0cf05542179169a1c49a4c8784a9480621e2066ca7d/detect_secrets-1.5.0.tar.gz", hash = "sha256:6bb46dcc553c10df51475641bb30fd69d25645cc12339e46c824c1e0c388898a", size = 97351, upload-time = "2024-05-06T17:46:19.721Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4e/5e/4f5fe4b89fde1dc3ed0eb51bd4ce4c0bca406246673d370ea2ad0c58d747/detect_secrets-1.5.0-py3-none-any.whl", hash = "sha256:e24e7b9b5a35048c313e983f76c4bd09dad89f045ff059e354f9943bf45aa060", size = 120341, upload-time = "2024-05-06T17:46:16.628Z" }, -] - [[package]] name = "dill" version = "0.4.1" @@ -2469,6 +2456,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/72/eb/698dc17e4beb315f83a47d47be128b8a63303dc8b7e7c31110410e10a68b/keystoneauth1-5.14.0-py3-none-any.whl", hash = "sha256:f8c503a95fdd83b5b72736657e4ffbb53d4b28b01763f23013f0294ed8a0e4b9", size = 343268, upload-time = "2026-05-13T09:09:24.573Z" }, ] +[[package]] +name = "kingfisher-bin" +version = "1.104.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/2b/324212f1baf482a7d4b66a2edf33073336735b67bb6b04a38d18fd9e67fb/kingfisher_bin-1.104.0-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:8e3840e67004a971fef80aba240ee5c3c5f7a3a343a6d1083a2751aaf866d5d3", size = 14057606, upload-time = "2026-06-22T03:03:01.419Z" }, + { url = "https://files.pythonhosted.org/packages/21/0a/cbf964da5102657cb9be4a59db7c9f7807ef88f9419673b7486daba785d3/kingfisher_bin-1.104.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:b838313411fa2166a318a45aec2cfcc238e2f30f5292e309ca1129a73180c851", size = 12468386, upload-time = "2026-06-22T03:03:03.951Z" }, + { url = "https://files.pythonhosted.org/packages/0b/a0/cc7ef0ac28f147cdfc9d80e4239fff11c1329831c6f57510c929e848753c/kingfisher_bin-1.104.0-py3-none-manylinux_2_17_aarch64.musllinux_1_2_aarch64.whl", hash = "sha256:0a94abbf2154ef8a3b4845cc0240e2321cdc19e0f5c7f585ea5252e76b242f68", size = 13943188, upload-time = "2026-06-22T03:03:06.378Z" }, + { url = "https://files.pythonhosted.org/packages/17/79/827cfd7787885798a00b5ab905bdc866ef6f8deeff0f708679b06bc9baaa/kingfisher_bin-1.104.0-py3-none-manylinux_2_17_x86_64.musllinux_1_2_x86_64.whl", hash = "sha256:f381274b946f7f68ed72911770fff72024f2192c6e2e2158f2a7fbfda8c482fb", size = 14757594, upload-time = "2026-06-22T03:03:08.66Z" }, + { url = "https://files.pythonhosted.org/packages/da/93/b0061fc69cd10382f647f9266823f213fd0b3f168f8b5bd9151a2370abb1/kingfisher_bin-1.104.0-py3-none-win_amd64.whl", hash = "sha256:f228d0dd61a738673b1c536e965a5661a83b1ee6ca64186a46ba6ea81ab4fd0b", size = 27697957, upload-time = "2026-06-22T03:03:11.268Z" }, + { url = "https://files.pythonhosted.org/packages/a5/fb/f062665b4eb3f77e799cb6335e56bc2945aea83787888a6c1ab329858d0a/kingfisher_bin-1.104.0-py3-none-win_arm64.whl", hash = "sha256:a7774d9d11815ca946bd80b8c9df0f1d39c36cb5a21def3323b99d148dc63065", size = 26063704, upload-time = "2026-06-22T03:03:14.08Z" }, +] + [[package]] name = "kubernetes" version = "32.0.1" @@ -3605,12 +3605,12 @@ dependencies = [ { name = "dash" }, { name = "dash-bootstrap-components" }, { name = "defusedxml" }, - { name = "detect-secrets" }, { name = "dulwich" }, { name = "google-api-python-client" }, { name = "google-auth-httplib2" }, { name = "h2" }, { name = "jsonschema" }, + { name = "kingfisher-bin" }, { name = "kubernetes" }, { name = "linode-api4" }, { name = "markdown" }, @@ -3714,12 +3714,12 @@ requires-dist = [ { name = "dash", specifier = "==3.1.1" }, { name = "dash-bootstrap-components", specifier = "==2.0.3" }, { name = "defusedxml", specifier = "==0.7.1" }, - { name = "detect-secrets", specifier = "==1.5.0" }, { name = "dulwich", specifier = "==1.2.5" }, { name = "google-api-python-client", specifier = "==2.163.0" }, { name = "google-auth-httplib2", specifier = "==0.2.0" }, { name = "h2", specifier = "==4.3.0" }, { name = "jsonschema", specifier = "==4.23.0" }, + { name = "kingfisher-bin", specifier = "==1.104.0" }, { name = "kubernetes", specifier = "==32.0.1" }, { name = "linode-api4", specifier = "==5.45.0" }, { name = "markdown", specifier = "==3.10.2" },