diff --git a/.env.dist b/.env.dist index 97c524e..3a41fd8 100644 --- a/.env.dist +++ b/.env.dist @@ -2,6 +2,5 @@ AWS_ACCESS_KEY_ID= AWS_PROFILE= AWS_SECRET_ACCESS_KEY= AWS_SESSION_TOKEN= -PULUMI_ACCESS_TOKEN= PULUMI_BACKEND_URL= PULUMI_SECRETS_PROVIDER= diff --git a/.github/actions/load-aws-ci-env/action.yml b/.github/actions/load-aws-ci-env/action.yml new file mode 100644 index 0000000..efa7d03 --- /dev/null +++ b/.github/actions/load-aws-ci-env/action.yml @@ -0,0 +1,240 @@ +name: Load AWS Secrets Manager CI environment +description: >- + Assumes a GitHub OIDC AWS config-read role, loads CI configuration from + AWS Secrets Manager, and validates required deployment variables. +inputs: + environment: + description: Fixed CI configuration suffix such as test-pr, test, prod-preview, or prod. + required: true + purpose: + description: Human-readable reason for loading this configuration. + required: false + default: "Pulumi deployment" + required-keys: + description: Newline-delimited environment variables that must exist in the JSON secret. + required: true + config-role-arn: + description: Non-secret AWS IAM role ARN trusted by GitHub OIDC to read this CI config secret. + required: true + aws-region: + description: AWS region containing the AWS Secrets Manager CI config secret. + required: true +outputs: + aws-account-id: + description: AWS account ID loaded from the CI config secret. + value: ${{ steps.collect.outputs.aws-account-id }} + aws-region: + description: AWS region loaded from the CI config secret. + value: ${{ steps.collect.outputs.aws-region }} + aws-preview-role-arn: + description: Preview role ARN loaded from the CI config secret. + value: ${{ steps.collect.outputs.aws-preview-role-arn }} + aws-apply-role-arn: + description: Apply role ARN loaded from the CI config secret. + value: ${{ steps.collect.outputs.aws-apply-role-arn }} + aws-drift-role-arn: + description: Drift role ARN loaded from the CI config secret. + value: ${{ steps.collect.outputs.aws-drift-role-arn }} + aws-operations-alert-triage-role-arn: + description: Operations alert triage role ARN loaded from the CI config secret. + value: ${{ steps.collect.outputs.aws-operations-alert-triage-role-arn }} + pulumi-backend-url: + description: Pulumi backend URL loaded from the CI config secret. + value: ${{ steps.collect.outputs.pulumi-backend-url }} + pulumi-secrets-provider: + description: Pulumi KMS secrets provider loaded from the CI config secret. + value: ${{ steps.collect.outputs.pulumi-secrets-provider }} +runs: + using: composite + steps: + - name: Resolve AWS Secrets Manager target + id: aws-target + shell: bash + env: + CI_CONFIG_ENVIRONMENT: ${{ inputs.environment }} + CI_CONFIG_ROLE_ARN: ${{ inputs.config-role-arn }} + CI_CONFIG_AWS_REGION: ${{ inputs.aws-region }} + run: | + set -euo pipefail + python3 - <<'PY' + import os + import re + import sys + + valid_suffixes = {"test-pr", "test", "prod-preview", "prod"} + suffix = os.environ["CI_CONFIG_ENVIRONMENT"].strip() + if suffix not in valid_suffixes: + print(f"Unsupported CI configuration suffix: {suffix}", file=sys.stderr) + sys.exit(1) + + role_arn = os.environ["CI_CONFIG_ROLE_ARN"].strip() + role_match = re.fullmatch(r"arn:aws:iam::(\d{12}):role/[A-Za-z0-9+=,.@_/-]+", role_arn) + if not role_match: + print("config-role-arn must be an AWS IAM role ARN.", file=sys.stderr) + sys.exit(1) + + region = os.environ["CI_CONFIG_AWS_REGION"].strip() + if not re.fullmatch(r"[a-z]{2}-[a-z]+-\d", region): + print("aws-region must look like an AWS region, for example us-east-1.", file=sys.stderr) + sys.exit(1) + + repository = os.environ.get("GITHUB_REPOSITORY", "") + if "/" not in repository: + print("GITHUB_REPOSITORY must be present.", file=sys.stderr) + sys.exit(1) + repo_name = repository.split("/", 1)[1] + repo_slug = re.sub(r"[^a-z0-9-]+", "-", repo_name.lower()).strip("-") + if not repo_slug: + print("Unable to derive repository slug for CI secret ID.", file=sys.stderr) + sys.exit(1) + + secret_id = f"/{repo_slug}/ci/{suffix}" + with open(os.environ["GITHUB_OUTPUT"], "a", encoding="utf-8") as output: + output.write(f"secret_id={secret_id}\n") + output.write(f"config_account_id={role_match.group(1)}\n") + output.write(f"aws_region={region}\n") + PY + + - name: Record AWS Secrets Manager source-of-truth boundary + shell: bash + env: + CI_CONFIG_SECRET_ID: ${{ steps.aws-target.outputs.secret_id }} + CI_CONFIG_PURPOSE: ${{ inputs.purpose }} + run: | + set -euo pipefail + { + echo "### AWS Secrets Manager CI configuration" + echo "- Secret ID: \`${CI_CONFIG_SECRET_ID}\`" + echo "- Purpose: ${CI_CONFIG_PURPOSE}" + echo "- Source of truth: AWS Secrets Manager" + echo "- Pulumi Cloud/ESC: not used" + echo "- Secret values are not printed in workflow logs" + } >> "${GITHUB_STEP_SUMMARY}" + + - name: Configure AWS config-read credentials + uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 + with: + role-to-assume: ${{ inputs.config-role-arn }} + role-session-name: gha-ci-config-${{ github.run_id }} + aws-region: ${{ steps.aws-target.outputs.aws_region }} + allowed-account-ids: ${{ steps.aws-target.outputs.config_account_id }} + + - name: Load AWS Secrets Manager CI values + id: load + shell: bash + env: + CI_CONFIG_SECRET_ID: ${{ steps.aws-target.outputs.secret_id }} + CI_CONFIG_ACCOUNT_ID: ${{ steps.aws-target.outputs.config_account_id }} + REQUIRED_KEYS: ${{ inputs.required-keys }} + run: | + set -euo pipefail + secret_file="$(mktemp "${RUNNER_TEMP:-/tmp}/ci-config.XXXXXX")" + cleanup() { + if command -v shred >/dev/null 2>&1; then + shred -u "${secret_file}" 2>/dev/null || rm -f "${secret_file}" + else + rm -f "${secret_file}" + fi + } + trap cleanup EXIT + + aws secretsmanager get-secret-value \ + --secret-id "${CI_CONFIG_SECRET_ID}" \ + --query SecretString \ + --output text > "${secret_file}" + + python3 - "${secret_file}" <<'PY' + import json + import os + import sys + from pathlib import Path + + secret_file = Path(sys.argv[1]) + try: + payload = json.loads(secret_file.read_text(encoding="utf-8")) + except json.JSONDecodeError as exc: + print(f"CI config secret is not valid JSON: {exc}", file=sys.stderr) + sys.exit(1) + + if not isinstance(payload, dict): + print("CI config secret must be a JSON object.", file=sys.stderr) + sys.exit(1) + + required = [ + key.strip() + for line in os.environ["REQUIRED_KEYS"].splitlines() + for key in line.split(",") + if key.strip() + ] + missing = [key for key in required if key not in payload or str(payload[key]).strip() == ""] + if missing: + print("CI config secret is missing required keys: " + ", ".join(sorted(missing)), file=sys.stderr) + sys.exit(1) + + exports: dict[str, str] = {} + for key in required: + value = payload[key] + if isinstance(value, bool): + text = "true" if value else "false" + elif isinstance(value, (int, float, str)): + text = str(value).strip() + else: + print(f"CI config value {key} must be scalar.", file=sys.stderr) + sys.exit(1) + if "\n" in text or "\r" in text: + print(f"CI config value {key} must not contain newlines.", file=sys.stderr) + sys.exit(1) + exports[key] = text + + expected_account_id = os.environ["CI_CONFIG_ACCOUNT_ID"] + actual_account_id = exports.get("AWS_ACCOUNT_ID") + if actual_account_id and actual_account_id != expected_account_id: + print( + "AWS_ACCOUNT_ID in CI config secret does not match config-read role account.", + file=sys.stderr, + ) + sys.exit(1) + + secret_id = os.environ["CI_CONFIG_SECRET_ID"] + with open(os.environ["GITHUB_ENV"], "a", encoding="utf-8") as env: + env.write(f"CI_CONFIG_SECRET_ID={secret_id}\n") + for key, value in exports.items(): + env.write(f"{key}={value}\n") + + with open(os.environ["GITHUB_OUTPUT"], "a", encoding="utf-8") as output: + output.write("loaded=true\n") + PY + + - name: Install uv for validation + uses: astral-sh/setup-uv@22695119d769bdb6f7032ad67b9bca0ef8c4a174 + with: + version: "0.8.14" + enable-cache: true + + - name: Validate AWS Secrets Manager CI environment + shell: bash + env: + CI_CONFIG_SECRET_ID: ${{ steps.aws-target.outputs.secret_id }} + CI_CONFIG_PURPOSE: ${{ inputs.purpose }} + REQUIRED_KEYS: ${{ inputs.required-keys }} + run: | + set -euo pipefail + uv run python scripts/validate_ci_environment.py \ + --purpose "${CI_CONFIG_PURPOSE}" \ + --required-keys "${REQUIRED_KEYS}" + + - name: Collect CI configuration outputs + id: collect + shell: bash + run: | + set -euo pipefail + { + echo "aws-account-id=${AWS_ACCOUNT_ID}" + echo "aws-region=${AWS_REGION}" + echo "aws-preview-role-arn=${AWS_PREVIEW_ROLE_ARN:-}" + echo "aws-apply-role-arn=${AWS_APPLY_ROLE_ARN:-}" + echo "aws-drift-role-arn=${AWS_DRIFT_ROLE_ARN:-}" + echo "aws-operations-alert-triage-role-arn=${AWS_OPERATIONS_ALERT_TRIAGE_ROLE_ARN:-}" + echo "pulumi-backend-url=${PULUMI_BACKEND_URL}" + echo "pulumi-secrets-provider=${PULUMI_SECRETS_PROVIDER}" + } >> "${GITHUB_OUTPUT}" diff --git a/.github/github-actions-secrets.md b/.github/github-actions-secrets.md index d08fc1c..199aa9d 100644 --- a/.github/github-actions-secrets.md +++ b/.github/github-actions-secrets.md @@ -1,60 +1,26 @@ -# GitHub Actions Secrets for Pulumi Workflows +# GitHub Actions Secrets -This repository uses GitHub OIDC and GitHub environment-scoped configuration -for AWS-backed Pulumi workflows. Do not add long-lived AWS access keys for -preview, apply, drift, or IAM validation jobs. +Privileged Pulumi workflows load account-local CI configuration directly from +AWS Secrets Manager through GitHub OIDC. Pulumi Cloud and Pulumi ESC are not +used. -## Environment Configuration +Repository variables: -Configure account-specific values under **Settings -> Environments**: +- `AWS_TEST_REGION` +- `AWS_TEST_PR_CI_CONFIG_ROLE_ARN` +- `AWS_TEST_CI_CONFIG_ROLE_ARN` +- `AWS_PROD_REGION` +- `AWS_PROD_PREVIEW_CI_CONFIG_ROLE_ARN` +- `AWS_PROD_CI_CONFIG_ROLE_ARN` -- `test` for trusted PR previews, test apply, and test drift. -- `prod-preview` for production preview and production drift. -- `prod` for production apply only. +AWS Secrets Manager secret IDs: -Each privileged environment should define these variables as applicable: +- `/bootstrap-infrastructure/ci/test-pr` +- `/bootstrap-infrastructure/ci/test` +- `/bootstrap-infrastructure/ci/prod-preview` +- `/bootstrap-infrastructure/ci/prod` -| Variable | Purpose | -| --- | --- | -| `AWS_ACCOUNT_ID` | Expected AWS account for `allowed-account-ids` | -| `AWS_REGION` | AWS region for OIDC and Pulumi | -| `AWS_PREVIEW_ROLE_ARN` | Preview and IAM validation role | -| `AWS_APPLY_ROLE_ARN` | Apply role for `test` and `prod` | -| `AWS_DRIFT_ROLE_ARN` | Drift role | -| `AWS_OPERATIONS_ALERT_TRIAGE_ROLE_ARN` | Dedicated role for operations alert issue triage | -| `PULUMI_BACKEND_URL` | Account-local Pulumi backend | -| `PULUMI_SECRETS_PROVIDER` | AWS KMS Pulumi secrets provider URI | -| `PULUMI_PREVIEW_STACKS` | Explicit preview stack list | -| `PULUMI_DRIFT_STACKS` | Explicit drift stack list | -| `PULUMI_PR_BACKEND_URL` | Optional backend used only by trusted PR previews | -| `PULUMI_PR_PREVIEW_STACKS` | Optional stack list used only by trusted PR previews | - -Use `PULUMI_ACCESS_TOKEN` only as an environment secret when the selected -backend is Pulumi Cloud. Self-managed S3 backends do not need it. - -## OIDC Trust - -OIDC roles should trust the repository and the target GitHub environment: - -```text -repo:VilnaCRM-Org/bootstrap-infrastructure:environment: -``` - -Use `allowed-account-ids: ${{ env.AWS_ACCOUNT_ID }}` in -`aws-actions/configure-aws-credentials` with `AWS_ACCOUNT_ID` populated from the -GitHub environment through job-level `env:`. That keeps the assumed account -preflight-validated and prevents a workflow from assuming a role in the wrong -account. Store role ARNs as job or workflow environment variables, not -repository-wide variables, when they differ by account or purpose. -The operations alert triage role should also trust only -`.github/workflows/operations-alert-triage.yml` on the protected main branch. - -## Production Protection - -The `prod` environment must require reviewers and deployment branch -restrictions before production apply is enabled. Production preview runs through -`prod-preview`; production apply verifies the same commit SHA before using the -saved Pulumi plan. - -Release and template-sync credentials that are not AWS account-specific can -remain repository or organization secrets. +Use the [AWS Secrets Manager CI cutover manual](../docs/aws-secrets-manager-ci-cutover.md) +for setup and cleanup. Do not store account IDs, role ARNs, Pulumi backend URLs, +stack lists, or KMS secrets-provider URIs in GitHub Environment variables after +AWS-only CI is green. diff --git a/.github/workflows/github-environment-legacy-cleanup.yml b/.github/workflows/github-environment-legacy-cleanup.yml new file mode 100644 index 0000000..96f7352 --- /dev/null +++ b/.github/workflows/github-environment-legacy-cleanup.yml @@ -0,0 +1,144 @@ +name: GitHub Environment Legacy Variable Cleanup + +on: + workflow_dispatch: + inputs: + dry_run: + description: Report variables that would be deleted without changing GitHub + required: true + type: boolean + default: true + confirmation: + description: Type the documented confirmation sentence + required: true + type: string + +concurrency: + group: github-environment-legacy-variable-cleanup + cancel-in-progress: false + +permissions: + contents: read + +jobs: + cleanup: + name: Remove Legacy Account Variables + runs-on: ubuntu-latest + timeout-minutes: 10 + env: + GH_REPO: ${{ github.repository }} + GH_ENVIRONMENT_ADMIN_TOKEN: ${{ secrets.GH_ENVIRONMENT_ADMIN_TOKEN }} + DRY_RUN: ${{ inputs.dry_run }} + CONFIRMATION: ${{ inputs.confirmation }} + steps: + - name: Remove legacy GitHub Environment variables + shell: bash + run: | + set -euo pipefail + + expected="I confirm AWS Secrets Manager-backed privileged CI is green and legacy GitHub Environment variables can be removed" + if [[ "${CONFIRMATION}" != "${expected}" ]]; then + echo "error: confirmation sentence does not match the documented value." >&2 + exit 1 + fi + + if [[ -z "${GH_ENVIRONMENT_ADMIN_TOKEN}" ]]; then + echo "error: GH_ENVIRONMENT_ADMIN_TOKEN is required for GitHub Environment variable cleanup." >&2 + exit 1 + fi + export GH_TOKEN="${GH_ENVIRONMENT_ADMIN_TOKEN}" + + if [[ "${DRY_RUN}" != "true" && "${DRY_RUN}" != "false" ]]; then + echo "error: dry_run must be true or false." >&2 + exit 1 + fi + + environments=( + "test" + "prod-preview" + "prod" + ) + legacy_variables=( + "AWS_ACCOUNT_ID" + "AWS_APPLY_ROLE_ARN" + "AWS_DRIFT_ROLE_ARN" + "AWS_OPERATIONS_ALERT_TRIAGE_ROLE_ARN" + "AWS_PREVIEW_ROLE_ARN" + "AWS_REGION" + "OPERATIONS_ALERT_QUEUE_NAME" + "OPERATIONS_CLOUDTRAIL_NAME" + "OPERATIONS_TOPIC_ARN" + "PULUMI_BACKEND_URL" + "PULUMI_DRIFT_STACKS" + "PULUMI_PR_BACKEND_URL" + "PULUMI_PR_PREVIEW_STACKS" + "PULUMI_PREVIEW_STACKS" + "PULUMI_SECRETS_PROVIDER" + ) + + list_environment_variables() { + local environment_name="$1" + gh variable list \ + --repo "${GH_REPO}" \ + --env "${environment_name}" \ + --json name \ + --jq '.[].name' + } + + overall_status=0 + for environment_name in "${environments[@]}"; do + echo "::group::${environment_name}" + if ! existing_names="$(list_environment_variables "${environment_name}")"; then + if [[ "${DRY_RUN}" == "true" ]]; then + echo "::notice::GitHub Environment '${environment_name}' is not readable; skipping." + else + echo "error: GitHub Environment '${environment_name}' is not readable." >&2 + overall_status=1 + fi + echo "::endgroup::" + continue + fi + + found_legacy="false" + for variable_name in "${legacy_variables[@]}"; do + if ! grep -Fxq "${variable_name}" <<< "${existing_names}"; then + continue + fi + found_legacy="true" + + if [[ "${DRY_RUN}" == "true" ]]; then + echo "::notice::Would delete ${variable_name} from ${environment_name}." + continue + fi + + if gh variable delete "${variable_name}" \ + --repo "${GH_REPO}" \ + --env "${environment_name}"; then + echo "::notice::Deleted ${variable_name} from ${environment_name}." + else + echo "error: failed to delete ${variable_name} from ${environment_name}." >&2 + overall_status=1 + fi + done + + if [[ "${found_legacy}" == "false" ]]; then + echo "::notice::No allowlisted legacy variables found in ${environment_name}." + fi + + if [[ "${DRY_RUN}" == "false" ]]; then + if ! remaining_names="$(list_environment_variables "${environment_name}")"; then + echo "error: could not verify ${environment_name} after cleanup." >&2 + overall_status=1 + else + for variable_name in "${legacy_variables[@]}"; do + if grep -Fxq "${variable_name}" <<< "${remaining_names}"; then + echo "error: ${variable_name} still exists in ${environment_name}." >&2 + overall_status=1 + fi + done + fi + fi + echo "::endgroup::" + done + + exit "${overall_status}" diff --git a/.github/workflows/nightly-guardrails.yml b/.github/workflows/nightly-guardrails.yml index ad84fdb..eaf37f8 100644 --- a/.github/workflows/nightly-guardrails.yml +++ b/.github/workflows/nightly-guardrails.yml @@ -24,25 +24,28 @@ jobs: concurrency: group: bootstrap-infrastructure-test-state cancel-in-progress: false - environment: test permissions: contents: read id-token: write env: PULUMI_SKIP_UPDATE_CHECK: "true" - AWS_ACCOUNT_ID: ${{ vars.AWS_ACCOUNT_ID }} - AWS_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DRIFT_ROLE_ARN: ${{ vars.AWS_DRIFT_ROLE_ARN }} - PULUMI_BACKEND_URL: ${{ vars.PULUMI_BACKEND_URL }} - PULUMI_SECRETS_PROVIDER: ${{ vars.PULUMI_SECRETS_PROVIDER }} - PULUMI_DRIFT_STACKS: ${{ vars.PULUMI_DRIFT_STACKS }} - PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_ACCESS_TOKEN }} steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 with: persist-credentials: false + - name: Load test AWS CI configuration + id: ci_config + uses: ./.github/actions/load-aws-ci-env + with: + environment: test + config-role-arn: ${{ vars.AWS_TEST_CI_CONFIG_ROLE_ARN }} + aws-region: ${{ vars.AWS_TEST_REGION }} + purpose: test drift + required-keys: >- + AWS_ACCOUNT_ID,AWS_REGION,AWS_DRIFT_ROLE_ARN, + PULUMI_BACKEND_URL,PULUMI_SECRETS_PROVIDER,PULUMI_DRIFT_STACKS + - name: Validate drift detection prerequisites run: | missing=() @@ -76,17 +79,19 @@ jobs: - name: Configure AWS credentials via OIDC uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 with: - role-to-assume: ${{ env.AWS_DRIFT_ROLE_ARN }} + role-to-assume: ${{ steps.ci_config.outputs.aws-drift-role-arn }} role-session-name: gha-test-drift-${{ github.run_id }} - aws-region: ${{ env.AWS_REGION }} - allowed-account-ids: ${{ env.AWS_ACCOUNT_ID }} + aws-region: ${{ steps.ci_config.outputs.aws-region }} + allowed-account-ids: ${{ steps.ci_config.outputs.aws-account-id }} - name: Emit drift evidence run: | account_id="$(aws sts get-caller-identity --query Account --output text)" - echo "GitHub environment: test" + echo "CI config secret: ${CI_CONFIG_SECRET_ID}" + echo "Backend type: s3" echo "Commit SHA: ${GITHUB_SHA}" - echo "AWS account ID: ${account_id}" + echo "Expected AWS account ID: ${AWS_ACCOUNT_ID}" + echo "Current AWS account ID: ${account_id}" echo "Pulumi stacks: ${PULUMI_DRIFT_STACKS}" echo "Role purpose: drift" @@ -103,25 +108,28 @@ jobs: concurrency: group: bootstrap-infrastructure-prod-state cancel-in-progress: false - environment: prod-preview permissions: contents: read id-token: write env: PULUMI_SKIP_UPDATE_CHECK: "true" - AWS_ACCOUNT_ID: ${{ vars.AWS_ACCOUNT_ID }} - AWS_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DRIFT_ROLE_ARN: ${{ vars.AWS_DRIFT_ROLE_ARN }} - PULUMI_BACKEND_URL: ${{ vars.PULUMI_BACKEND_URL }} - PULUMI_SECRETS_PROVIDER: ${{ vars.PULUMI_SECRETS_PROVIDER }} - PULUMI_DRIFT_STACKS: ${{ vars.PULUMI_DRIFT_STACKS }} - PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_ACCESS_TOKEN }} steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 with: persist-credentials: false + - name: Load prod preview AWS CI configuration + id: ci_config + uses: ./.github/actions/load-aws-ci-env + with: + environment: prod-preview + config-role-arn: ${{ vars.AWS_PROD_PREVIEW_CI_CONFIG_ROLE_ARN }} + aws-region: ${{ vars.AWS_PROD_REGION }} + purpose: prod drift + required-keys: >- + AWS_ACCOUNT_ID,AWS_REGION,AWS_DRIFT_ROLE_ARN, + PULUMI_BACKEND_URL,PULUMI_SECRETS_PROVIDER,PULUMI_DRIFT_STACKS + - name: Validate drift detection prerequisites run: | missing=() @@ -155,17 +163,19 @@ jobs: - name: Configure AWS credentials via OIDC uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 with: - role-to-assume: ${{ env.AWS_DRIFT_ROLE_ARN }} + role-to-assume: ${{ steps.ci_config.outputs.aws-drift-role-arn }} role-session-name: gha-prod-drift-${{ github.run_id }} - aws-region: ${{ env.AWS_REGION }} - allowed-account-ids: ${{ env.AWS_ACCOUNT_ID }} + aws-region: ${{ steps.ci_config.outputs.aws-region }} + allowed-account-ids: ${{ steps.ci_config.outputs.aws-account-id }} - name: Emit drift evidence run: | account_id="$(aws sts get-caller-identity --query Account --output text)" - echo "GitHub environment: prod-preview" + echo "CI config secret: ${CI_CONFIG_SECRET_ID}" + echo "Backend type: s3" echo "Commit SHA: ${GITHUB_SHA}" - echo "AWS account ID: ${account_id}" + echo "Expected AWS account ID: ${AWS_ACCOUNT_ID}" + echo "Current AWS account ID: ${account_id}" echo "Pulumi stacks: ${PULUMI_DRIFT_STACKS}" echo "Role purpose: drift" diff --git a/.github/workflows/operations-alert-backfill.yml b/.github/workflows/operations-alert-backfill.yml new file mode 100644 index 0000000..f42868c --- /dev/null +++ b/.github/workflows/operations-alert-backfill.yml @@ -0,0 +1,199 @@ +name: Operations Alert Canonical Backfill + +on: + workflow_dispatch: + inputs: + stable_event_json: + description: SRE-confirmed stable EventBridge JSON object + required: true + default: >- + {"source":"aws.backup","detailType":"Backup Job State Change","state":"","resourceArn":"","detail":{},"resources":[]} + type: string + message_count: + description: Legacy alert message count represented by this backfill + required: true + default: '1' + type: string + queue_name: + description: Operations alert queue name + required: true + default: bootstrap-test-operations-alerts + type: string + aws_account_id: + description: AWS account ID from SRE-confirmed alert metadata + required: true + default: '891377212104' + type: string + aws_region: + description: AWS region from SRE-confirmed alert metadata + required: true + default: eu-central-1 + type: string + confirmation: + description: Type the documented confirmation sentence + required: true + type: string + sre_confirmation_reference: + description: HTTPS URL for the SRE confirmation comment or ticket + required: true + type: string + +concurrency: + group: operations-alert-canonical-backfill + cancel-in-progress: false + +permissions: + contents: read + issues: write + +jobs: + backfill: + name: Create Or Update Canonical Alert Issue + runs-on: ubuntu-latest + environment: operations-alert-reconcile + timeout-minutes: 10 + env: + GH_TOKEN: ${{ github.token }} + GH_REPO: ${{ github.repository }} + STABLE_EVENT_JSON: ${{ inputs.stable_event_json }} + MESSAGE_COUNT: ${{ inputs.message_count }} + OPERATIONS_ALERT_QUEUE_NAME: ${{ inputs.queue_name }} + AWS_ACCOUNT_ID: ${{ inputs.aws_account_id }} + AWS_REGION: ${{ inputs.aws_region }} + CONFIRMATION: ${{ inputs.confirmation }} + SRE_CONFIRMATION_REFERENCE: ${{ inputs.sre_confirmation_reference }} + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 + with: + persist-credentials: false + + - name: Create or update canonical operations alert issue + run: | + expected="I confirm these stable fields represent the canonical operations alert stream" + if [[ "${CONFIRMATION}" != "${expected}" ]]; then + echo "error: confirmation sentence does not match the documented value." >&2 + exit 1 + fi + if [[ ! "${SRE_CONFIRMATION_REFERENCE}" =~ ^https://[^[:space:]]+$ ]]; then + echo "error: sre_confirmation_reference must be an HTTPS URL without whitespace." >&2 + exit 1 + fi + if [[ ! "${AWS_ACCOUNT_ID}" =~ ^[0-9]{12}$ ]]; then + echo "error: aws_account_id must be a 12-digit AWS account ID." >&2 + exit 1 + fi + if [[ ! "${AWS_REGION}" =~ ^[a-z]{2}-[a-z]+-[0-9]$ ]]; then + echo "error: aws_region must look like an AWS region." >&2 + exit 1 + fi + if [[ ! "${MESSAGE_COUNT}" =~ ^[1-9][0-9]*$ ]]; then + echo "error: message_count must be a positive integer." >&2 + exit 1 + fi + + stable_event_file="$(mktemp)" + alerts_json="$(mktemp)" + body_file="$(mktemp)" + fingerprint_file="$(mktemp)" + issue_body_file="$(mktemp)" + + printf '%s\n' "${STABLE_EVENT_JSON}" | + jq -e ' + def has_text($name): + (.[$name] // "") | type == "string" and length > 0; + if type != "object" then + error("stable_event_json must be an object") + elif (has_text("source") | not) then + error("stable_event_json.source is required") + elif ((.detailType // .detail_type // "") | type != "string" or length == 0) then + error("stable_event_json.detailType is required") + elif (has_text("state") | not) then + error("stable_event_json.state is required") + elif (((.resourceArn // .resource_arn // "") | type != "string") or ((.resourceArn // .resource_arn // "") | length == 0)) then + error("stable_event_json.resourceArn is required") + elif ((.detail // {}) | type != "object") then + error("stable_event_json.detail must be an object") + elif ((.resources // []) | type != "array") then + error("stable_event_json.resources must be an array") + else + . + end' > "${stable_event_file}" + + jq -n \ + --arg reviewed_at "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \ + --slurpfile stable_event "${stable_event_file}" \ + '{ + Messages: [ + { + MessageId: "backfill-sre-confirmed", + Attributes: {SentTimestamp: "backfill"}, + Body: ({ + MessageId: "backfill-sre-confirmed", + Timestamp: $reviewed_at, + Message: (($stable_event[0]) as $event | { + source: $event.source, + "detail-type": ($event.detailType // $event.detail_type), + time: $reviewed_at, + resources: ($event.resources // []), + detail: ( + ($event.detail // {}) + { + state: $event.state, + backupVaultName: ( + $event.backupVaultName // $event.backup_vault_name // "" + ), + backupPlanId: ( + $event.backupPlanId // $event.backup_plan_id // "" + ), + backupRuleId: ( + $event.backupRuleId // $event.backup_rule_id // "" + ), + resourceArn: ($event.resourceArn // $event.resource_arn) + } + | with_entries(select(.value != "")) + ) + } | tostring) + } | tostring) + } + ] + }' > "${alerts_json}" + + python3 scripts/operations_alert_triage.py \ + --alerts-json "${alerts_json}" \ + --queue-name "${OPERATIONS_ALERT_QUEUE_NAME}" \ + --account-id "${AWS_ACCOUNT_ID}" \ + --region "${AWS_REGION}" \ + --body-file "${body_file}" \ + --fingerprint-file "${fingerprint_file}" + + fingerprint="$(cat "${fingerprint_file}")" + { + cat "${body_file}" + printf '\nCanonical backfill: this issue was created from SRE-confirmed stable fields for %s legacy message(s).\n' "${MESSAGE_COUNT}" + printf 'SRE confirmation reference: %s\n' "${SRE_CONFIRMATION_REFERENCE}" + } > "${issue_body_file}" + + existing_issue="$( + gh issue list \ + --repo "${GH_REPO}" \ + --state open \ + --search "operations-alert:fingerprint=${fingerprint} in:body" \ + --json number \ + --jq '.[0].number // ""' + )" + if [[ -n "${existing_issue}" ]]; then + gh issue comment \ + "${existing_issue}" \ + --repo "${GH_REPO}" \ + --body-file "${issue_body_file}" + printf 'Updated canonical operations alert issue #%s for fingerprint %s.\n' \ + "${existing_issue}" \ + "${fingerprint}" + else + issue_url="$( + gh issue create \ + --repo "${GH_REPO}" \ + --title "Operations alerts queued: canonical backfill (${MESSAGE_COUNT} message(s))" \ + --body-file "${issue_body_file}" + )" + printf 'Created canonical operations alert issue: %s\n' "${issue_url}" + fi diff --git a/.github/workflows/operations-alert-reconcile.yml b/.github/workflows/operations-alert-reconcile.yml new file mode 100644 index 0000000..85ef24d --- /dev/null +++ b/.github/workflows/operations-alert-reconcile.yml @@ -0,0 +1,127 @@ +name: Operations Alert Legacy Reconcile + +on: + workflow_dispatch: + inputs: + canonical_issue: + description: Canonical fingerprinted operations alert issue number + required: true + type: string + legacy_issues: + description: Space or comma separated legacy duplicate issue numbers + required: true + type: string + confirmation: + description: Type the documented confirmation sentence + required: true + type: string + sre_confirmation_reference: + description: HTTPS URL for the SRE confirmation comment or ticket + required: true + type: string + +concurrency: + group: operations-alert-legacy-reconcile + cancel-in-progress: false + +permissions: + contents: read + issues: write + +jobs: + reconcile: + name: Close Confirmed Legacy Duplicates + runs-on: ubuntu-latest + environment: operations-alert-reconcile + timeout-minutes: 10 + env: + GH_TOKEN: ${{ github.token }} + GH_REPO: ${{ github.repository }} + CANONICAL_ISSUE: ${{ inputs.canonical_issue }} + LEGACY_ISSUES: ${{ inputs.legacy_issues }} + CONFIRMATION: ${{ inputs.confirmation }} + SRE_CONFIRMATION_REFERENCE: ${{ inputs.sre_confirmation_reference }} + steps: + - name: Reconcile legacy operations alerts + shell: bash + run: | + expected="I confirm these legacy issues match the canonical operations alert stream" + if [[ "${CONFIRMATION}" != "${expected}" ]]; then + echo "error: confirmation sentence does not match the documented value." >&2 + exit 1 + fi + sre_reference="${SRE_CONFIRMATION_REFERENCE}" + if [[ ! "${sre_reference}" =~ ^https://[^[:space:]]+$ ]]; then + echo "error: sre_confirmation_reference must be an HTTPS URL without whitespace." >&2 + exit 1 + fi + + canonical="${CANONICAL_ISSUE#\#}" + if [[ ! "${canonical}" =~ ^[0-9]+$ ]]; then + echo "error: canonical_issue must be an issue number." >&2 + exit 1 + fi + + normalized="${LEGACY_ISSUES//,/ }" + declare -A seen_issues=() + legacy_issue_ids=() + for raw_issue in ${normalized}; do + issue="${raw_issue#\#}" + if [[ ! "${issue}" =~ ^[0-9]+$ ]]; then + echo "error: legacy issue '${raw_issue}' is not an issue number." >&2 + exit 1 + fi + if [[ -n "${seen_issues[${issue}]:-}" ]]; then + continue + fi + seen_issues["${issue}"]=1 + legacy_issue_ids+=("${issue}") + done + if [[ "${#legacy_issue_ids[@]}" -eq 0 ]]; then + echo "error: provide at least one legacy issue number." >&2 + exit 1 + fi + + canonical_state="$(gh issue view "${canonical}" --repo "${GH_REPO}" --json state --jq '.state')" + canonical_title="$(gh issue view "${canonical}" --repo "${GH_REPO}" --json title --jq '.title')" + canonical_body="$(gh issue view "${canonical}" --repo "${GH_REPO}" --json body --jq '.body')" + if [[ "${canonical_state}" != "OPEN" ]]; then + echo "error: canonical issue ${canonical} is not open." >&2 + exit 1 + fi + if [[ "${canonical_title}" != Operations\ alerts\ queued:* ]]; then + echo "error: canonical issue ${canonical} is not an operations alert issue." >&2 + exit 1 + fi + if [[ "${canonical_body}" != *"operations-alert:fingerprint="* ]]; then + echo "error: canonical issue must contain an operations-alert fingerprint marker in its body." >&2 + exit 1 + fi + + for issue in "${legacy_issue_ids[@]}"; do + if [[ "${issue}" == "${canonical}" ]]; then + echo "error: legacy issue ${issue} cannot be the canonical issue." >&2 + exit 1 + fi + + state="$(gh issue view "${issue}" --repo "${GH_REPO}" --json state --jq '.state')" + title="$(gh issue view "${issue}" --repo "${GH_REPO}" --json title --jq '.title')" + body="$(gh issue view "${issue}" --repo "${GH_REPO}" --json body --jq '.body')" + if [[ "${state}" != "OPEN" ]]; then + echo "error: legacy issue ${issue} is not open." >&2 + exit 1 + fi + if [[ "${title}" != Operations\ alerts\ queued:* ]]; then + echo "error: legacy issue ${issue} is not an operations alert issue." >&2 + exit 1 + fi + if [[ "${body}" == *"operations-alert:fingerprint="* ]]; then + echo "error: legacy issue ${issue} already has a fingerprint marker." >&2 + exit 1 + fi + + gh issue close "${issue}" \ + --repo "${GH_REPO}" \ + --duplicate-of "${canonical}" \ + --comment "Closed as a confirmed duplicate of #${canonical}. SRE confirmation was provided through the Operations Alert Legacy Reconcile workflow after matching this legacy alert to the canonical fingerprinted operations-alert stream. SRE confirmation reference: ${sre_reference}" + done diff --git a/.github/workflows/operations-alert-triage.yml b/.github/workflows/operations-alert-triage.yml index 8102397..c518a15 100644 --- a/.github/workflows/operations-alert-triage.yml +++ b/.github/workflows/operations-alert-triage.yml @@ -22,19 +22,29 @@ jobs: name: Triage Operations Alerts runs-on: ubuntu-latest timeout-minutes: 10 - environment: test permissions: contents: read issues: write id-token: write env: - AWS_ACCOUNT_ID: ${{ vars.AWS_ACCOUNT_ID }} - AWS_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_OPERATIONS_ALERT_TRIAGE_ROLE_ARN: ${{ vars.AWS_OPERATIONS_ALERT_TRIAGE_ROLE_ARN }} GH_TOKEN: ${{ github.token }} - OPERATIONS_ALERT_QUEUE_NAME: ${{ vars.OPERATIONS_ALERT_QUEUE_NAME || 'bootstrap-test-operations-alerts' }} steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 + with: + persist-credentials: false + + - name: Load test AWS CI configuration + id: ci_config + uses: ./.github/actions/load-aws-ci-env + with: + environment: test + config-role-arn: ${{ vars.AWS_TEST_CI_CONFIG_ROLE_ARN }} + aws-region: ${{ vars.AWS_TEST_REGION }} + purpose: operations alert triage + required-keys: >- + AWS_ACCOUNT_ID,AWS_REGION,AWS_OPERATIONS_ALERT_TRIAGE_ROLE_ARN, + OPERATIONS_ALERT_QUEUE_NAME + - name: Validate triage prerequisites run: | missing=() @@ -58,12 +68,17 @@ jobs: - name: Configure AWS credentials via OIDC uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 with: - role-to-assume: ${{ env.AWS_OPERATIONS_ALERT_TRIAGE_ROLE_ARN }} + role-to-assume: ${{ steps.ci_config.outputs.aws-operations-alert-triage-role-arn }} role-session-name: gha-operations-alert-triage-${{ github.run_id }} - aws-region: ${{ env.AWS_REGION }} - allowed-account-ids: ${{ env.AWS_ACCOUNT_ID }} + aws-region: ${{ steps.ci_config.outputs.aws-region }} + allowed-account-ids: ${{ steps.ci_config.outputs.aws-account-id }} + + - name: Install uv for triage renderer + run: | + python3 -m pip install --user 'uv==0.9.21' + echo "${HOME}/.local/bin" >> "${GITHUB_PATH}" - - name: Create GitHub issue for queued operations alerts + - name: Create or update GitHub issue for queued operations alerts env: GITHUB_REPOSITORY_NAME: ${{ github.repository }} run: | @@ -85,6 +100,7 @@ jobs: --queue-url "${queue_url}" \ --max-number-of-messages 10 \ --wait-time-seconds 10 \ + --visibility-timeout 600 \ --attribute-names SentTimestamp \ --output json > "${page_json}" @@ -110,68 +126,59 @@ jobs: printf 'Reached receive page limit after collecting %s operations alert message(s).\n' "${alert_count}" fi - body_file="$(mktemp)" - { - printf 'The operations alert queue contains %s message(s).\n\n' "${alert_count}" - printf 'This issue intentionally records sanitized metadata only. Use the message IDs, event source, detail type, event time, CloudTrail, and linked runbooks for investigation; do not paste raw alert payloads, stack exports, credentials, tokens, or private incident notes.\n\n' - printf 'Queue: %s\n' "${OPERATIONS_ALERT_QUEUE_NAME}" - printf 'AWS account: %s\n' "${AWS_ACCOUNT_ID}" - printf 'AWS region: %s\n\n' "${AWS_REGION}" - printf 'Messages:\n' - python - "${alerts_json}" <<'PY' - import json - import sys - - - def load_json(value): - if isinstance(value, dict): - return value - if not isinstance(value, str) or not value: - return {} - try: - loaded = json.loads(value) - except json.JSONDecodeError: - return {} - return loaded if isinstance(loaded, dict) else {} - - - def safe_value(value): - if value in (None, ""): - return "unknown" - text = str(value).replace("`", "'").replace("\n", " ") - return text[:200] - - - with open(sys.argv[1], encoding="utf-8") as alerts_file: - alerts = json.load(alerts_file) - - for message in alerts.get("Messages", []): - sns = load_json(message.get("Body")) - event = load_json(sns.get("Message")) - attributes = message.get("Attributes") or {} - fields = { - "sqsMessageId": message.get("MessageId"), - "snsMessageId": sns.get("MessageId"), - "sentTimestamp": attributes.get("SentTimestamp"), - "eventSource": event.get("source"), - "detailType": event.get("detail-type"), - "eventTime": event.get("time") or sns.get("Timestamp"), - } - rendered = ", ".join( - f"{name}: `{safe_value(value)}`" - for name, value in fields.items() - ) - print(f"- {rendered}") - PY - } > "${body_file}" - - issue_url="$( - gh issue create \ - --repo "${GITHUB_REPOSITORY_NAME}" \ - --title "Operations alerts queued: ${alert_count} message(s)" \ - --body-file "${body_file}" - )" - printf 'Created operations alert issue: %s\n' "${issue_url}" + aggregate_body_file="$(mktemp)" + aggregate_fingerprint_file="$(mktemp)" + groups_file="$(mktemp)" + uv run python scripts/operations_alert_triage.py \ + --alerts-json "${alerts_json}" \ + --queue-name "${OPERATIONS_ALERT_QUEUE_NAME}" \ + --account-id "${AWS_ACCOUNT_ID}" \ + --region "${AWS_REGION}" \ + --body-file "${aggregate_body_file}" \ + --fingerprint-file "${aggregate_fingerprint_file}" \ + --groups-file "${groups_file}" + + group_count="$(jq '.groups | length' "${groups_file}")" + for ((group_index = 0; group_index < group_count; group_index++)); do + group_alerts_json="$(mktemp)" + body_file="$(mktemp)" + fingerprint_file="$(mktemp)" + jq ".groups[${group_index}].alerts" "${groups_file}" > "${group_alerts_json}" + group_alert_count="$(jq '.Messages | length' "${group_alerts_json}")" + uv run python scripts/operations_alert_triage.py \ + --alerts-json "${group_alerts_json}" \ + --queue-name "${OPERATIONS_ALERT_QUEUE_NAME}" \ + --account-id "${AWS_ACCOUNT_ID}" \ + --region "${AWS_REGION}" \ + --body-file "${body_file}" \ + --fingerprint-file "${fingerprint_file}" + fingerprint="$(cat "${fingerprint_file}")" + existing_issue="$( + gh issue list \ + --repo "${GITHUB_REPOSITORY_NAME}" \ + --state open \ + --search "operations-alert:fingerprint=${fingerprint} in:body" \ + --json number \ + --jq '.[0].number // ""' + )" + if [[ -n "${existing_issue}" ]]; then + gh issue comment \ + "${existing_issue}" \ + --repo "${GITHUB_REPOSITORY_NAME}" \ + --body-file "${body_file}" + printf 'Updated operations alert issue #%s for fingerprint %s.\n' \ + "${existing_issue}" \ + "${fingerprint}" + else + issue_url="$( + gh issue create \ + --repo "${GITHUB_REPOSITORY_NAME}" \ + --title "Operations alerts queued: ${group_alert_count} message(s)" \ + --body-file "${body_file}" + )" + printf 'Created operations alert issue: %s\n' "${issue_url}" + fi + done jq -r '.Messages[].ReceiptHandle' "${alerts_json}" | while IFS= read -r receipt_handle; do diff --git a/.github/workflows/pulumi-pr-command-runner.yml b/.github/workflows/pulumi-pr-command-runner.yml index c8df75d..2f45050 100644 --- a/.github/workflows/pulumi-pr-command-runner.yml +++ b/.github/workflows/pulumi-pr-command-runner.yml @@ -134,26 +134,29 @@ jobs: needs: - preflight timeout-minutes: 25 - environment: test permissions: contents: read id-token: write env: PULUMI_SKIP_UPDATE_CHECK: "true" - AWS_ACCOUNT_ID: ${{ vars.AWS_ACCOUNT_ID }} - AWS_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_PREVIEW_ROLE_ARN: ${{ vars.AWS_PREVIEW_ROLE_ARN }} - PULUMI_BACKEND_URL: ${{ vars.PULUMI_BACKEND_URL || vars.PULUMI_PR_BACKEND_URL }} - PULUMI_SECRETS_PROVIDER: ${{ vars.PULUMI_SECRETS_PROVIDER }} - PULUMI_PREVIEW_STACKS: ${{ vars.PULUMI_PREVIEW_STACKS || vars.PULUMI_PR_PREVIEW_STACKS }} - PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_ACCESS_TOKEN }} steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 with: ref: ${{ needs.preflight.outputs.head_sha }} persist-credentials: false + - name: Load test AWS CI configuration + id: ci_config + uses: ./.github/actions/load-aws-ci-env + with: + environment: test + config-role-arn: ${{ vars.AWS_TEST_CI_CONFIG_ROLE_ARN }} + aws-region: ${{ vars.AWS_TEST_REGION }} + purpose: PR test preview + required-keys: >- + AWS_ACCOUNT_ID,AWS_REGION,AWS_PREVIEW_ROLE_ARN, + PULUMI_BACKEND_URL,PULUMI_SECRETS_PROVIDER,PULUMI_PREVIEW_STACKS + - name: Validate test preview prerequisites run: | missing=() @@ -187,17 +190,19 @@ jobs: - name: Configure AWS preview credentials via OIDC uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 with: - role-to-assume: ${{ env.AWS_PREVIEW_ROLE_ARN }} + role-to-assume: ${{ steps.ci_config.outputs.aws-preview-role-arn }} role-session-name: gha-pr-test-preview-${{ github.run_id }} - aws-region: ${{ env.AWS_REGION }} - allowed-account-ids: ${{ env.AWS_ACCOUNT_ID }} + aws-region: ${{ steps.ci_config.outputs.aws-region }} + allowed-account-ids: ${{ steps.ci_config.outputs.aws-account-id }} - name: Emit test preview evidence run: | account_id="$(aws sts get-caller-identity --query Account --output text)" - echo "GitHub environment: test" + echo "CI config secret: ${CI_CONFIG_SECRET_ID}" + echo "Backend type: s3" echo "Commit SHA: ${{ needs.preflight.outputs.head_sha }}" - echo "AWS account ID: ${account_id}" + echo "Expected AWS account ID: ${AWS_ACCOUNT_ID}" + echo "Current AWS account ID: ${account_id}" echo "Pulumi stacks: ${PULUMI_PREVIEW_STACKS}" echo "Role purpose: preview" echo "PR command: ${{ needs.preflight.outputs.display_command }}" @@ -272,33 +277,36 @@ jobs: - test_preview - test_destructive_diff timeout-minutes: 15 - environment: test permissions: contents: read id-token: write env: PULUMI_SKIP_UPDATE_CHECK: "true" - AWS_ACCOUNT_ID: ${{ vars.AWS_ACCOUNT_ID }} - AWS_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_PREVIEW_ROLE_ARN: ${{ vars.AWS_PREVIEW_ROLE_ARN }} - PULUMI_BACKEND_URL: ${{ vars.PULUMI_BACKEND_URL || vars.PULUMI_PR_BACKEND_URL }} - PULUMI_SECRETS_PROVIDER: ${{ vars.PULUMI_SECRETS_PROVIDER }} - PULUMI_PREVIEW_STACKS: ${{ vars.PULUMI_PREVIEW_STACKS || vars.PULUMI_PR_PREVIEW_STACKS }} - PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_ACCESS_TOKEN }} steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 with: ref: ${{ needs.preflight.outputs.head_sha }} persist-credentials: false + - name: Load test AWS CI configuration + id: ci_config + uses: ./.github/actions/load-aws-ci-env + with: + environment: test + config-role-arn: ${{ vars.AWS_TEST_CI_CONFIG_ROLE_ARN }} + aws-region: ${{ vars.AWS_TEST_REGION }} + purpose: PR test IAM validation + required-keys: >- + AWS_ACCOUNT_ID,AWS_REGION,AWS_PREVIEW_ROLE_ARN, + PULUMI_BACKEND_URL,PULUMI_SECRETS_PROVIDER,PULUMI_PREVIEW_STACKS + - name: Configure AWS preview credentials via OIDC uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 with: - role-to-assume: ${{ env.AWS_PREVIEW_ROLE_ARN }} + role-to-assume: ${{ steps.ci_config.outputs.aws-preview-role-arn }} role-session-name: gha-pr-test-iam-${{ github.run_id }} - aws-region: ${{ env.AWS_REGION }} - allowed-account-ids: ${{ env.AWS_ACCOUNT_ID }} + aws-region: ${{ steps.ci_config.outputs.aws-region }} + allowed-account-ids: ${{ steps.ci_config.outputs.aws-account-id }} - name: Start development environment run: make start @@ -327,40 +335,45 @@ jobs: - test_destructive_diff - test_iam_validation timeout-minutes: 30 - environment: test permissions: contents: read id-token: write env: PULUMI_SKIP_UPDATE_CHECK: "true" - AWS_ACCOUNT_ID: ${{ vars.AWS_ACCOUNT_ID }} - AWS_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_APPLY_ROLE_ARN: ${{ vars.AWS_APPLY_ROLE_ARN || vars.AWS_PREVIEW_ROLE_ARN }} - PULUMI_BACKEND_URL: ${{ vars.PULUMI_BACKEND_URL || vars.PULUMI_PR_BACKEND_URL }} - PULUMI_SECRETS_PROVIDER: ${{ vars.PULUMI_SECRETS_PROVIDER }} - PULUMI_PREVIEW_STACKS: ${{ vars.PULUMI_PREVIEW_STACKS || vars.PULUMI_PR_PREVIEW_STACKS }} - PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_ACCESS_TOKEN }} steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 with: ref: ${{ needs.preflight.outputs.head_sha }} persist-credentials: false + - name: Load test AWS CI configuration + id: ci_config + uses: ./.github/actions/load-aws-ci-env + with: + environment: test + config-role-arn: ${{ vars.AWS_TEST_CI_CONFIG_ROLE_ARN }} + aws-region: ${{ vars.AWS_TEST_REGION }} + purpose: PR test apply + required-keys: >- + AWS_ACCOUNT_ID,AWS_REGION,AWS_APPLY_ROLE_ARN, + PULUMI_BACKEND_URL,PULUMI_SECRETS_PROVIDER,PULUMI_PREVIEW_STACKS + - name: Configure AWS apply credentials via OIDC uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 with: - role-to-assume: ${{ env.AWS_APPLY_ROLE_ARN }} + role-to-assume: ${{ steps.ci_config.outputs.aws-apply-role-arn }} role-session-name: gha-pr-test-apply-${{ github.run_id }} - aws-region: ${{ env.AWS_REGION }} - allowed-account-ids: ${{ env.AWS_ACCOUNT_ID }} + aws-region: ${{ steps.ci_config.outputs.aws-region }} + allowed-account-ids: ${{ steps.ci_config.outputs.aws-account-id }} - name: Emit test apply evidence run: | account_id="$(aws sts get-caller-identity --query Account --output text)" - echo "GitHub environment: test" + echo "CI config secret: ${CI_CONFIG_SECRET_ID}" + echo "Backend type: s3" echo "Commit SHA: ${{ needs.preflight.outputs.head_sha }}" - echo "AWS account ID: ${account_id}" + echo "Expected AWS account ID: ${AWS_ACCOUNT_ID}" + echo "Current AWS account ID: ${account_id}" echo "Pulumi stacks: ${PULUMI_PREVIEW_STACKS}" echo "Role purpose: apply" echo "PR command: ${{ needs.preflight.outputs.display_command }}" @@ -380,20 +393,7 @@ jobs: env: GITHUB_TOKEN: ${{ github.token }} PULUMI_EXPECTED_SHA: ${{ needs.preflight.outputs.head_sha }} - run: | - set +e - make pulumi-up-plan 2>&1 | tee .artifacts/pulumi-plan/apply.log - apply_rc="${PIPESTATUS[0]}" - set -e - if [[ "${apply_rc}" -eq 0 ]]; then - exit 0 - fi - if grep -Fq "decrypting secret value: cipher: message authentication failed" .artifacts/pulumi-plan/apply.log; then - echo "::warning::Saved Pulumi plan failed with the known KMS plan-decrypt error; retrying guarded direct test apply after preview, destructive diff, and IAM validation gates." - make pulumi-up - exit 0 - fi - exit "${apply_rc}" + run: make pulumi-up-plan test_post_apply_drift: name: Test Post-Apply Drift @@ -406,33 +406,36 @@ jobs: - preflight - test_apply timeout-minutes: 20 - environment: test permissions: contents: read id-token: write env: PULUMI_SKIP_UPDATE_CHECK: "true" - AWS_ACCOUNT_ID: ${{ vars.AWS_ACCOUNT_ID }} - AWS_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DRIFT_ROLE_ARN: ${{ vars.AWS_DRIFT_ROLE_ARN || vars.AWS_PREVIEW_ROLE_ARN }} - PULUMI_BACKEND_URL: ${{ vars.PULUMI_BACKEND_URL || vars.PULUMI_PR_BACKEND_URL }} - PULUMI_SECRETS_PROVIDER: ${{ vars.PULUMI_SECRETS_PROVIDER }} - PULUMI_DRIFT_STACKS: ${{ vars.PULUMI_DRIFT_STACKS || vars.PULUMI_PR_PREVIEW_STACKS }} - PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_ACCESS_TOKEN }} steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 with: ref: ${{ needs.preflight.outputs.head_sha }} persist-credentials: false + - name: Load test AWS CI configuration + id: ci_config + uses: ./.github/actions/load-aws-ci-env + with: + environment: test + config-role-arn: ${{ vars.AWS_TEST_CI_CONFIG_ROLE_ARN }} + aws-region: ${{ vars.AWS_TEST_REGION }} + purpose: PR test drift + required-keys: >- + AWS_ACCOUNT_ID,AWS_REGION,AWS_DRIFT_ROLE_ARN, + PULUMI_BACKEND_URL,PULUMI_SECRETS_PROVIDER,PULUMI_DRIFT_STACKS + - name: Configure AWS drift credentials via OIDC uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 with: - role-to-assume: ${{ env.AWS_DRIFT_ROLE_ARN }} + role-to-assume: ${{ steps.ci_config.outputs.aws-drift-role-arn }} role-session-name: gha-pr-test-drift-${{ github.run_id }} - aws-region: ${{ env.AWS_REGION }} - allowed-account-ids: ${{ env.AWS_ACCOUNT_ID }} + aws-region: ${{ steps.ci_config.outputs.aws-region }} + allowed-account-ids: ${{ steps.ci_config.outputs.aws-account-id }} - name: Start development environment run: make start @@ -450,28 +453,30 @@ jobs: - preflight - test_post_apply_drift timeout-minutes: 25 - environment: prod-preview permissions: contents: read id-token: write env: PULUMI_SKIP_UPDATE_CHECK: "true" - AWS_ACCOUNT_ID: ${{ vars.AWS_ACCOUNT_ID }} - AWS_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_PREVIEW_ROLE_ARN: ${{ vars.AWS_PREVIEW_ROLE_ARN }} - AWS_DRIFT_ROLE_ARN: ${{ vars.AWS_DRIFT_ROLE_ARN }} - PULUMI_BACKEND_URL: ${{ vars.PULUMI_BACKEND_URL }} - PULUMI_SECRETS_PROVIDER: ${{ vars.PULUMI_SECRETS_PROVIDER }} - PULUMI_PREVIEW_STACKS: ${{ vars.PULUMI_PREVIEW_STACKS }} - PULUMI_DRIFT_STACKS: ${{ vars.PULUMI_DRIFT_STACKS }} - PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_ACCESS_TOKEN }} steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 with: ref: ${{ needs.preflight.outputs.head_sha }} persist-credentials: false + - name: Load prod preview AWS CI configuration + id: ci_config + uses: ./.github/actions/load-aws-ci-env + with: + environment: prod-preview + config-role-arn: ${{ vars.AWS_PROD_PREVIEW_CI_CONFIG_ROLE_ARN }} + aws-region: ${{ vars.AWS_PROD_REGION }} + purpose: PR prod preview + required-keys: >- + AWS_ACCOUNT_ID,AWS_REGION,AWS_PREVIEW_ROLE_ARN,AWS_DRIFT_ROLE_ARN, + PULUMI_BACKEND_URL,PULUMI_SECRETS_PROVIDER, + PULUMI_PREVIEW_STACKS,PULUMI_DRIFT_STACKS + - name: Validate production preview prerequisites run: | missing=() @@ -507,17 +512,19 @@ jobs: - name: Configure AWS preview credentials via OIDC uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 with: - role-to-assume: ${{ env.AWS_PREVIEW_ROLE_ARN }} + role-to-assume: ${{ steps.ci_config.outputs.aws-preview-role-arn }} role-session-name: gha-pr-prod-preview-${{ github.run_id }} - aws-region: ${{ env.AWS_REGION }} - allowed-account-ids: ${{ env.AWS_ACCOUNT_ID }} + aws-region: ${{ steps.ci_config.outputs.aws-region }} + allowed-account-ids: ${{ steps.ci_config.outputs.aws-account-id }} - name: Emit production preview evidence run: | account_id="$(aws sts get-caller-identity --query Account --output text)" - echo "GitHub environment: prod-preview" + echo "CI config secret: ${CI_CONFIG_SECRET_ID}" + echo "Backend type: s3" echo "Commit SHA: ${{ needs.preflight.outputs.head_sha }}" - echo "AWS account ID: ${account_id}" + echo "Expected AWS account ID: ${AWS_ACCOUNT_ID}" + echo "Current AWS account ID: ${account_id}" echo "Pulumi stacks: ${PULUMI_PREVIEW_STACKS}" echo "Role purpose: preview" echo "PR command: ${{ needs.preflight.outputs.display_command }}" @@ -594,33 +601,36 @@ jobs: - prod_preview - prod_destructive_diff timeout-minutes: 15 - environment: prod-preview permissions: contents: read id-token: write env: PULUMI_SKIP_UPDATE_CHECK: "true" - AWS_ACCOUNT_ID: ${{ vars.AWS_ACCOUNT_ID }} - AWS_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_PREVIEW_ROLE_ARN: ${{ vars.AWS_PREVIEW_ROLE_ARN }} - PULUMI_BACKEND_URL: ${{ vars.PULUMI_BACKEND_URL }} - PULUMI_SECRETS_PROVIDER: ${{ vars.PULUMI_SECRETS_PROVIDER }} - PULUMI_PREVIEW_STACKS: ${{ vars.PULUMI_PREVIEW_STACKS }} - PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_ACCESS_TOKEN }} steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 with: ref: ${{ needs.preflight.outputs.head_sha }} persist-credentials: false + - name: Load prod preview AWS CI configuration + id: ci_config + uses: ./.github/actions/load-aws-ci-env + with: + environment: prod-preview + config-role-arn: ${{ vars.AWS_PROD_PREVIEW_CI_CONFIG_ROLE_ARN }} + aws-region: ${{ vars.AWS_PROD_REGION }} + purpose: PR prod IAM validation + required-keys: >- + AWS_ACCOUNT_ID,AWS_REGION,AWS_PREVIEW_ROLE_ARN, + PULUMI_BACKEND_URL,PULUMI_SECRETS_PROVIDER,PULUMI_PREVIEW_STACKS + - name: Configure AWS preview credentials via OIDC uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 with: - role-to-assume: ${{ env.AWS_PREVIEW_ROLE_ARN }} + role-to-assume: ${{ steps.ci_config.outputs.aws-preview-role-arn }} role-session-name: gha-pr-prod-iam-${{ github.run_id }} - aws-region: ${{ env.AWS_REGION }} - allowed-account-ids: ${{ env.AWS_ACCOUNT_ID }} + aws-region: ${{ steps.ci_config.outputs.aws-region }} + allowed-account-ids: ${{ steps.ci_config.outputs.aws-account-id }} - name: Start development environment run: make start @@ -654,14 +664,6 @@ jobs: id-token: write env: PULUMI_SKIP_UPDATE_CHECK: "true" - AWS_ACCOUNT_ID: ${{ vars.AWS_ACCOUNT_ID }} - AWS_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_APPLY_ROLE_ARN: ${{ vars.AWS_APPLY_ROLE_ARN }} - PULUMI_BACKEND_URL: ${{ vars.PULUMI_BACKEND_URL }} - PULUMI_SECRETS_PROVIDER: ${{ vars.PULUMI_SECRETS_PROVIDER }} - PULUMI_PREVIEW_STACKS: ${{ vars.PULUMI_PREVIEW_STACKS }} - PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_ACCESS_TOKEN }} steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 with: @@ -677,20 +679,35 @@ jobs: fi echo "Approved production SHA: ${checked_out_sha}" + - name: Load prod AWS CI configuration + id: ci_config + uses: ./.github/actions/load-aws-ci-env + with: + environment: prod + config-role-arn: ${{ vars.AWS_PROD_CI_CONFIG_ROLE_ARN }} + aws-region: ${{ vars.AWS_PROD_REGION }} + purpose: PR prod apply + required-keys: >- + AWS_ACCOUNT_ID,AWS_REGION,AWS_APPLY_ROLE_ARN, + PULUMI_BACKEND_URL,PULUMI_SECRETS_PROVIDER,PULUMI_PREVIEW_STACKS + - name: Configure AWS apply credentials via OIDC uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 with: - role-to-assume: ${{ env.AWS_APPLY_ROLE_ARN }} + role-to-assume: ${{ steps.ci_config.outputs.aws-apply-role-arn }} role-session-name: gha-pr-prod-apply-${{ github.run_id }} - aws-region: ${{ env.AWS_REGION }} - allowed-account-ids: ${{ env.AWS_ACCOUNT_ID }} + aws-region: ${{ steps.ci_config.outputs.aws-region }} + allowed-account-ids: ${{ steps.ci_config.outputs.aws-account-id }} - name: Emit production apply evidence run: | account_id="$(aws sts get-caller-identity --query Account --output text)" echo "GitHub environment: prod" + echo "CI config secret: ${CI_CONFIG_SECRET_ID}" + echo "Backend type: s3" echo "Commit SHA: ${{ needs.preflight.outputs.head_sha }}" - echo "AWS account ID: ${account_id}" + echo "Expected AWS account ID: ${AWS_ACCOUNT_ID}" + echo "Current AWS account ID: ${account_id}" echo "Pulumi stacks: ${PULUMI_PREVIEW_STACKS}" echo "Role purpose: apply" echo "PR command: ${{ needs.preflight.outputs.display_command }}" @@ -722,33 +739,36 @@ jobs: - preflight - prod_apply timeout-minutes: 20 - environment: prod-preview permissions: contents: read id-token: write env: PULUMI_SKIP_UPDATE_CHECK: "true" - AWS_ACCOUNT_ID: ${{ vars.AWS_ACCOUNT_ID }} - AWS_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DRIFT_ROLE_ARN: ${{ vars.AWS_DRIFT_ROLE_ARN }} - PULUMI_BACKEND_URL: ${{ vars.PULUMI_BACKEND_URL }} - PULUMI_SECRETS_PROVIDER: ${{ vars.PULUMI_SECRETS_PROVIDER }} - PULUMI_DRIFT_STACKS: ${{ vars.PULUMI_DRIFT_STACKS }} - PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_ACCESS_TOKEN }} steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 with: ref: ${{ needs.preflight.outputs.head_sha }} persist-credentials: false + - name: Load prod preview AWS CI configuration + id: ci_config + uses: ./.github/actions/load-aws-ci-env + with: + environment: prod-preview + config-role-arn: ${{ vars.AWS_PROD_PREVIEW_CI_CONFIG_ROLE_ARN }} + aws-region: ${{ vars.AWS_PROD_REGION }} + purpose: PR prod drift + required-keys: >- + AWS_ACCOUNT_ID,AWS_REGION,AWS_DRIFT_ROLE_ARN, + PULUMI_BACKEND_URL,PULUMI_SECRETS_PROVIDER,PULUMI_DRIFT_STACKS + - name: Configure AWS drift credentials via OIDC uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 with: - role-to-assume: ${{ env.AWS_DRIFT_ROLE_ARN }} + role-to-assume: ${{ steps.ci_config.outputs.aws-drift-role-arn }} role-session-name: gha-pr-prod-drift-${{ github.run_id }} - aws-region: ${{ env.AWS_REGION }} - allowed-account-ids: ${{ env.AWS_ACCOUNT_ID }} + aws-region: ${{ steps.ci_config.outputs.aws-region }} + allowed-account-ids: ${{ steps.ci_config.outputs.aws-account-id }} - name: Start development environment run: make start diff --git a/.github/workflows/pulumi-pr-guardrails.yml b/.github/workflows/pulumi-pr-guardrails.yml index 3551274..1728302 100644 --- a/.github/workflows/pulumi-pr-guardrails.yml +++ b/.github/workflows/pulumi-pr-guardrails.yml @@ -47,25 +47,55 @@ jobs: needs: - preview_mode timeout-minutes: 20 - environment: test permissions: contents: read id-token: write env: PULUMI_SKIP_UPDATE_CHECK: "true" - AWS_ACCOUNT_ID: ${{ vars.AWS_ACCOUNT_ID }} - AWS_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_PREVIEW_ROLE_ARN: ${{ vars.AWS_PREVIEW_ROLE_ARN }} - PULUMI_BACKEND_URL: ${{ github.event_name == 'pull_request' && vars.PULUMI_PR_BACKEND_URL || vars.PULUMI_BACKEND_URL || vars.PULUMI_PR_BACKEND_URL }} - PULUMI_SECRETS_PROVIDER: ${{ vars.PULUMI_SECRETS_PROVIDER }} - PULUMI_PREVIEW_STACKS: ${{ github.event_name == 'pull_request' && vars.PULUMI_PR_PREVIEW_STACKS || vars.PULUMI_PREVIEW_STACKS || vars.PULUMI_PR_PREVIEW_STACKS }} - PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_ACCESS_TOKEN }} steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 with: persist-credentials: false + - name: Select test AWS CI configuration + id: ci_config_target + env: + GITHUB_EVENT_NAME: ${{ github.event_name }} + AWS_TEST_PR_CI_CONFIG_ROLE_ARN: ${{ vars.AWS_TEST_PR_CI_CONFIG_ROLE_ARN }} + AWS_TEST_CI_CONFIG_ROLE_ARN: ${{ vars.AWS_TEST_CI_CONFIG_ROLE_ARN }} + run: | + set -euo pipefail + if [[ "${GITHUB_EVENT_NAME}" == "pull_request" ]]; then + ci_environment="test-pr" + config_role_arn="${AWS_TEST_PR_CI_CONFIG_ROLE_ARN}" + config_variable_name="AWS_TEST_PR_CI_CONFIG_ROLE_ARN" + else + ci_environment="test" + config_role_arn="${AWS_TEST_CI_CONFIG_ROLE_ARN}" + config_variable_name="AWS_TEST_CI_CONFIG_ROLE_ARN" + fi + if [[ -z "${config_role_arn}" ]]; then + printf 'error: %s must be set for %s AWS CI configuration.\n' \ + "${config_variable_name}" "${ci_environment}" >&2 + exit 1 + fi + { + echo "environment=${ci_environment}" + echo "config-role-arn=${config_role_arn}" + } >> "${GITHUB_OUTPUT}" + + - name: Load test PR AWS CI configuration + id: ci_config + uses: ./.github/actions/load-aws-ci-env + with: + environment: ${{ steps.ci_config_target.outputs.environment }} + config-role-arn: ${{ steps.ci_config_target.outputs.config-role-arn }} + aws-region: ${{ vars.AWS_TEST_REGION }} + purpose: test preview + required-keys: >- + AWS_ACCOUNT_ID,AWS_REGION,AWS_PREVIEW_ROLE_ARN, + PULUMI_BACKEND_URL,PULUMI_SECRETS_PROVIDER,PULUMI_PREVIEW_STACKS + - name: Validate preview prerequisites run: | missing=() @@ -81,7 +111,7 @@ jobs: done if [[ "${#missing[@]}" -gt 0 ]]; then printf 'error: trusted PR guardrails are missing %s\n' "${missing[*]}" >&2 - echo "Set PULUMI_PR_BACKEND_URL/PULUMI_PR_PREVIEW_STACKS for a dedicated PR preview backend, or PULUMI_BACKEND_URL/PULUMI_PREVIEW_STACKS for the shared test stack." >&2 + echo "Populate the fixed AWS Secrets Manager CI config secret for the test PR preview or shared test stack." >&2 exit 1 fi if [[ ! "${AWS_ACCOUNT_ID}" =~ ^[0-9]{12}$ ]]; then @@ -100,17 +130,19 @@ jobs: - name: Configure AWS credentials via OIDC uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 with: - role-to-assume: ${{ env.AWS_PREVIEW_ROLE_ARN }} + role-to-assume: ${{ steps.ci_config.outputs.aws-preview-role-arn }} role-session-name: gha-test-preview-${{ github.run_id }} - aws-region: ${{ env.AWS_REGION }} - allowed-account-ids: ${{ env.AWS_ACCOUNT_ID }} + aws-region: ${{ steps.ci_config.outputs.aws-region }} + allowed-account-ids: ${{ steps.ci_config.outputs.aws-account-id }} - name: Emit preview evidence run: | account_id="$(aws sts get-caller-identity --query Account --output text)" - echo "GitHub environment: test" + echo "CI config secret: ${CI_CONFIG_SECRET_ID}" + echo "Backend type: s3" echo "Commit SHA: ${GITHUB_SHA}" - echo "AWS account ID: ${account_id}" + echo "Expected AWS account ID: ${AWS_ACCOUNT_ID}" + echo "Current AWS account ID: ${account_id}" echo "Pulumi stacks: ${PULUMI_PREVIEW_STACKS}" echo "Guardrail mode: privileged" echo "Role purpose: preview" @@ -203,32 +235,62 @@ jobs: needs: - preview timeout-minutes: 15 - environment: test permissions: contents: read id-token: write env: PULUMI_SKIP_UPDATE_CHECK: "true" - AWS_ACCOUNT_ID: ${{ vars.AWS_ACCOUNT_ID }} - AWS_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_PREVIEW_ROLE_ARN: ${{ vars.AWS_PREVIEW_ROLE_ARN }} - PULUMI_BACKEND_URL: ${{ vars.PULUMI_BACKEND_URL }} - PULUMI_SECRETS_PROVIDER: ${{ vars.PULUMI_SECRETS_PROVIDER }} - PULUMI_PREVIEW_STACKS: ${{ vars.PULUMI_PREVIEW_STACKS }} - PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_ACCESS_TOKEN }} steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 with: persist-credentials: false + - name: Select test AWS CI configuration + id: ci_config_target + env: + GITHUB_EVENT_NAME: ${{ github.event_name }} + AWS_TEST_PR_CI_CONFIG_ROLE_ARN: ${{ vars.AWS_TEST_PR_CI_CONFIG_ROLE_ARN }} + AWS_TEST_CI_CONFIG_ROLE_ARN: ${{ vars.AWS_TEST_CI_CONFIG_ROLE_ARN }} + run: | + set -euo pipefail + if [[ "${GITHUB_EVENT_NAME}" == "pull_request" ]]; then + ci_environment="test-pr" + config_role_arn="${AWS_TEST_PR_CI_CONFIG_ROLE_ARN}" + config_variable_name="AWS_TEST_PR_CI_CONFIG_ROLE_ARN" + else + ci_environment="test" + config_role_arn="${AWS_TEST_CI_CONFIG_ROLE_ARN}" + config_variable_name="AWS_TEST_CI_CONFIG_ROLE_ARN" + fi + if [[ -z "${config_role_arn}" ]]; then + printf 'error: %s must be set for %s AWS CI configuration.\n' \ + "${config_variable_name}" "${ci_environment}" >&2 + exit 1 + fi + { + echo "environment=${ci_environment}" + echo "config-role-arn=${config_role_arn}" + } >> "${GITHUB_OUTPUT}" + + - name: Load test PR AWS CI configuration + id: ci_config + uses: ./.github/actions/load-aws-ci-env + with: + environment: ${{ steps.ci_config_target.outputs.environment }} + config-role-arn: ${{ steps.ci_config_target.outputs.config-role-arn }} + aws-region: ${{ vars.AWS_TEST_REGION }} + purpose: test IAM validation + required-keys: >- + AWS_ACCOUNT_ID,AWS_REGION,AWS_PREVIEW_ROLE_ARN, + PULUMI_BACKEND_URL,PULUMI_SECRETS_PROVIDER,PULUMI_PREVIEW_STACKS + - name: Configure AWS credentials via OIDC uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 with: - role-to-assume: ${{ env.AWS_PREVIEW_ROLE_ARN }} + role-to-assume: ${{ steps.ci_config.outputs.aws-preview-role-arn }} role-session-name: gha-test-iam-validate-${{ github.run_id }} - aws-region: ${{ env.AWS_REGION }} - allowed-account-ids: ${{ env.AWS_ACCOUNT_ID }} + aws-region: ${{ steps.ci_config.outputs.aws-region }} + allowed-account-ids: ${{ steps.ci_config.outputs.aws-account-id }} - name: Start development environment run: make start diff --git a/.github/workflows/pulumi-prod.yml b/.github/workflows/pulumi-prod.yml index e8c0185..735d3da 100644 --- a/.github/workflows/pulumi-prod.yml +++ b/.github/workflows/pulumi-prod.yml @@ -25,7 +25,6 @@ jobs: name: Prod Preview runs-on: ubuntu-latest timeout-minutes: 25 - environment: prod-preview permissions: actions: read contents: read @@ -34,16 +33,6 @@ jobs: preview_sha: ${{ steps.resolve_sha.outputs.preview_sha }} env: PULUMI_SKIP_UPDATE_CHECK: "true" - AWS_ACCOUNT_ID: ${{ vars.AWS_ACCOUNT_ID }} - AWS_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_PREVIEW_ROLE_ARN: ${{ vars.AWS_PREVIEW_ROLE_ARN }} - AWS_DRIFT_ROLE_ARN: ${{ vars.AWS_DRIFT_ROLE_ARN }} - PULUMI_BACKEND_URL: ${{ vars.PULUMI_BACKEND_URL }} - PULUMI_SECRETS_PROVIDER: ${{ vars.PULUMI_SECRETS_PROVIDER }} - PULUMI_PREVIEW_STACKS: ${{ vars.PULUMI_PREVIEW_STACKS }} - PULUMI_DRIFT_STACKS: ${{ vars.PULUMI_DRIFT_STACKS }} - PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_ACCESS_TOKEN }} steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 with: @@ -84,6 +73,19 @@ jobs: fi echo "Pulumi Test Deploy run: ${run_id}" + - name: Load prod preview AWS CI configuration + id: ci_config + uses: ./.github/actions/load-aws-ci-env + with: + environment: prod-preview + config-role-arn: ${{ vars.AWS_PROD_PREVIEW_CI_CONFIG_ROLE_ARN }} + aws-region: ${{ vars.AWS_PROD_REGION }} + purpose: prod preview + required-keys: >- + AWS_ACCOUNT_ID,AWS_REGION,AWS_PREVIEW_ROLE_ARN,AWS_DRIFT_ROLE_ARN, + PULUMI_BACKEND_URL,PULUMI_SECRETS_PROVIDER, + PULUMI_PREVIEW_STACKS,PULUMI_DRIFT_STACKS + - name: Validate production preview prerequisites run: | missing=() @@ -119,19 +121,21 @@ jobs: - name: Configure AWS credentials via OIDC uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 with: - role-to-assume: ${{ env.AWS_PREVIEW_ROLE_ARN }} + role-to-assume: ${{ steps.ci_config.outputs.aws-preview-role-arn }} role-session-name: gha-prod-preview-${{ github.run_id }} - aws-region: ${{ env.AWS_REGION }} - allowed-account-ids: ${{ env.AWS_ACCOUNT_ID }} + aws-region: ${{ steps.ci_config.outputs.aws-region }} + allowed-account-ids: ${{ steps.ci_config.outputs.aws-account-id }} - name: Emit production preview evidence env: PREVIEW_SHA: ${{ steps.resolve_sha.outputs.preview_sha }} run: | account_id="$(aws sts get-caller-identity --query Account --output text)" - echo "GitHub environment: prod-preview" + echo "CI config secret: ${CI_CONFIG_SECRET_ID}" + echo "Backend type: s3" echo "Commit SHA: ${PREVIEW_SHA}" - echo "AWS account ID: ${account_id}" + echo "Expected AWS account ID: ${AWS_ACCOUNT_ID}" + echo "Current AWS account ID: ${account_id}" echo "Pulumi stacks: ${PULUMI_PREVIEW_STACKS}" echo "Role purpose: preview" @@ -188,33 +192,36 @@ jobs: needs: - preview timeout-minutes: 15 - environment: prod-preview permissions: contents: read id-token: write env: PULUMI_SKIP_UPDATE_CHECK: "true" - AWS_ACCOUNT_ID: ${{ vars.AWS_ACCOUNT_ID }} - AWS_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_PREVIEW_ROLE_ARN: ${{ vars.AWS_PREVIEW_ROLE_ARN }} - PULUMI_BACKEND_URL: ${{ vars.PULUMI_BACKEND_URL }} - PULUMI_SECRETS_PROVIDER: ${{ vars.PULUMI_SECRETS_PROVIDER }} - PULUMI_PREVIEW_STACKS: ${{ vars.PULUMI_PREVIEW_STACKS }} - PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_ACCESS_TOKEN }} steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 with: ref: ${{ needs.preview.outputs.preview_sha }} persist-credentials: false + - name: Load prod preview AWS CI configuration + id: ci_config + uses: ./.github/actions/load-aws-ci-env + with: + environment: prod-preview + config-role-arn: ${{ vars.AWS_PROD_PREVIEW_CI_CONFIG_ROLE_ARN }} + aws-region: ${{ vars.AWS_PROD_REGION }} + purpose: prod IAM validation + required-keys: >- + AWS_ACCOUNT_ID,AWS_REGION,AWS_PREVIEW_ROLE_ARN, + PULUMI_BACKEND_URL,PULUMI_SECRETS_PROVIDER,PULUMI_PREVIEW_STACKS + - name: Configure AWS credentials via OIDC uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 with: - role-to-assume: ${{ env.AWS_PREVIEW_ROLE_ARN }} + role-to-assume: ${{ steps.ci_config.outputs.aws-preview-role-arn }} role-session-name: gha-prod-iam-${{ github.run_id }} - aws-region: ${{ env.AWS_REGION }} - allowed-account-ids: ${{ env.AWS_ACCOUNT_ID }} + aws-region: ${{ steps.ci_config.outputs.aws-region }} + allowed-account-ids: ${{ steps.ci_config.outputs.aws-account-id }} - name: Start development environment run: make start @@ -242,14 +249,6 @@ jobs: id-token: write env: PULUMI_SKIP_UPDATE_CHECK: "true" - AWS_ACCOUNT_ID: ${{ vars.AWS_ACCOUNT_ID }} - AWS_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_APPLY_ROLE_ARN: ${{ vars.AWS_APPLY_ROLE_ARN }} - PULUMI_BACKEND_URL: ${{ vars.PULUMI_BACKEND_URL }} - PULUMI_SECRETS_PROVIDER: ${{ vars.PULUMI_SECRETS_PROVIDER }} - PULUMI_PREVIEW_STACKS: ${{ vars.PULUMI_PREVIEW_STACKS }} - PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_ACCESS_TOKEN }} steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 with: @@ -267,13 +266,25 @@ jobs: fi echo "Approved production SHA: ${PREVIEW_SHA}" + - name: Load prod AWS CI configuration + id: ci_config + uses: ./.github/actions/load-aws-ci-env + with: + environment: prod + config-role-arn: ${{ vars.AWS_PROD_CI_CONFIG_ROLE_ARN }} + aws-region: ${{ vars.AWS_PROD_REGION }} + purpose: prod apply + required-keys: >- + AWS_ACCOUNT_ID,AWS_REGION,AWS_APPLY_ROLE_ARN, + PULUMI_BACKEND_URL,PULUMI_SECRETS_PROVIDER,PULUMI_PREVIEW_STACKS + - name: Configure AWS credentials via OIDC uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 with: - role-to-assume: ${{ env.AWS_APPLY_ROLE_ARN }} + role-to-assume: ${{ steps.ci_config.outputs.aws-apply-role-arn }} role-session-name: gha-prod-apply-${{ github.run_id }} - aws-region: ${{ env.AWS_REGION }} - allowed-account-ids: ${{ env.AWS_ACCOUNT_ID }} + aws-region: ${{ steps.ci_config.outputs.aws-region }} + allowed-account-ids: ${{ steps.ci_config.outputs.aws-account-id }} - name: Emit production apply evidence env: @@ -281,8 +292,11 @@ jobs: run: | account_id="$(aws sts get-caller-identity --query Account --output text)" echo "GitHub environment: prod" + echo "CI config secret: ${CI_CONFIG_SECRET_ID}" + echo "Backend type: s3" echo "Commit SHA: ${PREVIEW_SHA}" - echo "AWS account ID: ${account_id}" + echo "Expected AWS account ID: ${AWS_ACCOUNT_ID}" + echo "Current AWS account ID: ${account_id}" echo "Pulumi stacks: ${PULUMI_PREVIEW_STACKS}" echo "Role purpose: apply" @@ -308,33 +322,36 @@ jobs: - apply - preview timeout-minutes: 20 - environment: prod-preview permissions: contents: read id-token: write env: PULUMI_SKIP_UPDATE_CHECK: "true" - AWS_ACCOUNT_ID: ${{ vars.AWS_ACCOUNT_ID }} - AWS_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DRIFT_ROLE_ARN: ${{ vars.AWS_DRIFT_ROLE_ARN }} - PULUMI_BACKEND_URL: ${{ vars.PULUMI_BACKEND_URL }} - PULUMI_SECRETS_PROVIDER: ${{ vars.PULUMI_SECRETS_PROVIDER }} - PULUMI_DRIFT_STACKS: ${{ vars.PULUMI_DRIFT_STACKS }} - PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_ACCESS_TOKEN }} steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 with: ref: ${{ needs.preview.outputs.preview_sha }} persist-credentials: false + - name: Load prod preview AWS CI configuration + id: ci_config + uses: ./.github/actions/load-aws-ci-env + with: + environment: prod-preview + config-role-arn: ${{ vars.AWS_PROD_PREVIEW_CI_CONFIG_ROLE_ARN }} + aws-region: ${{ vars.AWS_PROD_REGION }} + purpose: prod drift + required-keys: >- + AWS_ACCOUNT_ID,AWS_REGION,AWS_DRIFT_ROLE_ARN, + PULUMI_BACKEND_URL,PULUMI_SECRETS_PROVIDER,PULUMI_DRIFT_STACKS + - name: Configure AWS credentials via OIDC uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 with: - role-to-assume: ${{ env.AWS_DRIFT_ROLE_ARN }} + role-to-assume: ${{ steps.ci_config.outputs.aws-drift-role-arn }} role-session-name: gha-prod-drift-${{ github.run_id }} - aws-region: ${{ env.AWS_REGION }} - allowed-account-ids: ${{ env.AWS_ACCOUNT_ID }} + aws-region: ${{ steps.ci_config.outputs.aws-region }} + allowed-account-ids: ${{ steps.ci_config.outputs.aws-account-id }} - name: Start development environment run: make start diff --git a/.github/workflows/pulumi-test-deploy.yml b/.github/workflows/pulumi-test-deploy.yml index 8bf0100..08e554a 100644 --- a/.github/workflows/pulumi-test-deploy.yml +++ b/.github/workflows/pulumi-test-deploy.yml @@ -22,28 +22,29 @@ jobs: name: Test Preview runs-on: ubuntu-latest timeout-minutes: 25 - environment: test permissions: contents: read id-token: write env: PULUMI_SKIP_UPDATE_CHECK: "true" - AWS_ACCOUNT_ID: ${{ vars.AWS_ACCOUNT_ID }} - AWS_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_PREVIEW_ROLE_ARN: ${{ vars.AWS_PREVIEW_ROLE_ARN }} - AWS_APPLY_ROLE_ARN: ${{ vars.AWS_APPLY_ROLE_ARN || vars.AWS_PREVIEW_ROLE_ARN }} - AWS_DRIFT_ROLE_ARN: ${{ vars.AWS_DRIFT_ROLE_ARN || vars.AWS_PREVIEW_ROLE_ARN }} - PULUMI_BACKEND_URL: ${{ vars.PULUMI_BACKEND_URL || vars.PULUMI_PR_BACKEND_URL }} - PULUMI_SECRETS_PROVIDER: ${{ vars.PULUMI_SECRETS_PROVIDER }} - PULUMI_PREVIEW_STACKS: ${{ vars.PULUMI_PREVIEW_STACKS || vars.PULUMI_PR_PREVIEW_STACKS }} - PULUMI_DRIFT_STACKS: ${{ vars.PULUMI_DRIFT_STACKS || vars.PULUMI_PR_PREVIEW_STACKS }} - PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_ACCESS_TOKEN }} steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 with: persist-credentials: false + - name: Load test AWS CI configuration + id: ci_config + uses: ./.github/actions/load-aws-ci-env + with: + environment: test + config-role-arn: ${{ vars.AWS_TEST_CI_CONFIG_ROLE_ARN }} + aws-region: ${{ vars.AWS_TEST_REGION }} + purpose: test preview + required-keys: >- + AWS_ACCOUNT_ID,AWS_REGION,AWS_PREVIEW_ROLE_ARN,AWS_APPLY_ROLE_ARN, + AWS_DRIFT_ROLE_ARN,PULUMI_BACKEND_URL,PULUMI_SECRETS_PROVIDER, + PULUMI_PREVIEW_STACKS,PULUMI_DRIFT_STACKS + - name: Validate test deployment prerequisites run: | missing=() @@ -80,17 +81,19 @@ jobs: - name: Configure AWS credentials via OIDC uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 with: - role-to-assume: ${{ env.AWS_PREVIEW_ROLE_ARN }} + role-to-assume: ${{ steps.ci_config.outputs.aws-preview-role-arn }} role-session-name: gha-test-preflight-${{ github.run_id }} - aws-region: ${{ env.AWS_REGION }} - allowed-account-ids: ${{ env.AWS_ACCOUNT_ID }} + aws-region: ${{ steps.ci_config.outputs.aws-region }} + allowed-account-ids: ${{ steps.ci_config.outputs.aws-account-id }} - name: Emit test preview evidence run: | account_id="$(aws sts get-caller-identity --query Account --output text)" - echo "GitHub environment: test" + echo "CI config secret: ${CI_CONFIG_SECRET_ID}" + echo "Backend type: s3" echo "Commit SHA: ${GITHUB_SHA}" - echo "AWS account ID: ${account_id}" + echo "Expected AWS account ID: ${AWS_ACCOUNT_ID}" + echo "Current AWS account ID: ${account_id}" echo "Pulumi stacks: ${PULUMI_PREVIEW_STACKS}" echo "Role purpose: preview" @@ -146,32 +149,35 @@ jobs: needs: - preview timeout-minutes: 15 - environment: test permissions: contents: read id-token: write env: PULUMI_SKIP_UPDATE_CHECK: "true" - AWS_ACCOUNT_ID: ${{ vars.AWS_ACCOUNT_ID }} - AWS_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_PREVIEW_ROLE_ARN: ${{ vars.AWS_PREVIEW_ROLE_ARN }} - PULUMI_BACKEND_URL: ${{ vars.PULUMI_BACKEND_URL || vars.PULUMI_PR_BACKEND_URL }} - PULUMI_SECRETS_PROVIDER: ${{ vars.PULUMI_SECRETS_PROVIDER }} - PULUMI_PREVIEW_STACKS: ${{ vars.PULUMI_PREVIEW_STACKS || vars.PULUMI_PR_PREVIEW_STACKS }} - PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_ACCESS_TOKEN }} steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 with: persist-credentials: false + - name: Load test AWS CI configuration + id: ci_config + uses: ./.github/actions/load-aws-ci-env + with: + environment: test + config-role-arn: ${{ vars.AWS_TEST_CI_CONFIG_ROLE_ARN }} + aws-region: ${{ vars.AWS_TEST_REGION }} + purpose: test IAM validation + required-keys: >- + AWS_ACCOUNT_ID,AWS_REGION,AWS_PREVIEW_ROLE_ARN, + PULUMI_BACKEND_URL,PULUMI_SECRETS_PROVIDER,PULUMI_PREVIEW_STACKS + - name: Configure AWS credentials via OIDC uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 with: - role-to-assume: ${{ env.AWS_PREVIEW_ROLE_ARN }} + role-to-assume: ${{ steps.ci_config.outputs.aws-preview-role-arn }} role-session-name: gha-test-iam-${{ github.run_id }} - aws-region: ${{ env.AWS_REGION }} - allowed-account-ids: ${{ env.AWS_ACCOUNT_ID }} + aws-region: ${{ steps.ci_config.outputs.aws-region }} + allowed-account-ids: ${{ steps.ci_config.outputs.aws-account-id }} - name: Start development environment run: make start @@ -193,39 +199,44 @@ jobs: - iam_validation - preview timeout-minutes: 30 - environment: test permissions: contents: read id-token: write env: PULUMI_SKIP_UPDATE_CHECK: "true" - AWS_ACCOUNT_ID: ${{ vars.AWS_ACCOUNT_ID }} - AWS_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_APPLY_ROLE_ARN: ${{ vars.AWS_APPLY_ROLE_ARN || vars.AWS_PREVIEW_ROLE_ARN }} - PULUMI_BACKEND_URL: ${{ vars.PULUMI_BACKEND_URL || vars.PULUMI_PR_BACKEND_URL }} - PULUMI_SECRETS_PROVIDER: ${{ vars.PULUMI_SECRETS_PROVIDER }} - PULUMI_PREVIEW_STACKS: ${{ vars.PULUMI_PREVIEW_STACKS || vars.PULUMI_PR_PREVIEW_STACKS }} - PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_ACCESS_TOKEN }} steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 with: persist-credentials: false + - name: Load test AWS CI configuration + id: ci_config + uses: ./.github/actions/load-aws-ci-env + with: + environment: test + config-role-arn: ${{ vars.AWS_TEST_CI_CONFIG_ROLE_ARN }} + aws-region: ${{ vars.AWS_TEST_REGION }} + purpose: test apply + required-keys: >- + AWS_ACCOUNT_ID,AWS_REGION,AWS_APPLY_ROLE_ARN, + PULUMI_BACKEND_URL,PULUMI_SECRETS_PROVIDER,PULUMI_PREVIEW_STACKS + - name: Configure AWS credentials via OIDC uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 with: - role-to-assume: ${{ env.AWS_APPLY_ROLE_ARN }} + role-to-assume: ${{ steps.ci_config.outputs.aws-apply-role-arn }} role-session-name: gha-test-apply-${{ github.run_id }} - aws-region: ${{ env.AWS_REGION }} - allowed-account-ids: ${{ env.AWS_ACCOUNT_ID }} + aws-region: ${{ steps.ci_config.outputs.aws-region }} + allowed-account-ids: ${{ steps.ci_config.outputs.aws-account-id }} - name: Emit test apply evidence run: | account_id="$(aws sts get-caller-identity --query Account --output text)" - echo "GitHub environment: test" + echo "CI config secret: ${CI_CONFIG_SECRET_ID}" + echo "Backend type: s3" echo "Commit SHA: ${GITHUB_SHA}" - echo "AWS account ID: ${account_id}" + echo "Expected AWS account ID: ${AWS_ACCOUNT_ID}" + echo "Current AWS account ID: ${account_id}" echo "Pulumi stacks: ${PULUMI_PREVIEW_STACKS}" echo "Role purpose: apply" @@ -242,20 +253,7 @@ jobs: env: GITHUB_TOKEN: ${{ github.token }} PULUMI_EXPECTED_SHA: ${{ github.sha }} - run: | - set +e - make pulumi-up-plan 2>&1 | tee .artifacts/pulumi-plan/apply.log - apply_rc="${PIPESTATUS[0]}" - set -e - if [[ "${apply_rc}" -eq 0 ]]; then - exit 0 - fi - if grep -Fq "decrypting secret value: cipher: message authentication failed" .artifacts/pulumi-plan/apply.log; then - echo "::warning::Saved Pulumi plan failed with the known KMS plan-decrypt error; retrying guarded direct test apply after preview, destructive diff, and IAM validation gates." - make pulumi-up - exit 0 - fi - exit "${apply_rc}" + run: make pulumi-up-plan post_apply_drift: name: Test Post-Apply Drift @@ -263,32 +261,35 @@ jobs: needs: - apply timeout-minutes: 20 - environment: test permissions: contents: read id-token: write env: PULUMI_SKIP_UPDATE_CHECK: "true" - AWS_ACCOUNT_ID: ${{ vars.AWS_ACCOUNT_ID }} - AWS_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DRIFT_ROLE_ARN: ${{ vars.AWS_DRIFT_ROLE_ARN || vars.AWS_PREVIEW_ROLE_ARN }} - PULUMI_BACKEND_URL: ${{ vars.PULUMI_BACKEND_URL || vars.PULUMI_PR_BACKEND_URL }} - PULUMI_SECRETS_PROVIDER: ${{ vars.PULUMI_SECRETS_PROVIDER }} - PULUMI_DRIFT_STACKS: ${{ vars.PULUMI_DRIFT_STACKS || vars.PULUMI_PR_PREVIEW_STACKS }} - PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_ACCESS_TOKEN }} steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 with: persist-credentials: false + - name: Load test AWS CI configuration + id: ci_config + uses: ./.github/actions/load-aws-ci-env + with: + environment: test + config-role-arn: ${{ vars.AWS_TEST_CI_CONFIG_ROLE_ARN }} + aws-region: ${{ vars.AWS_TEST_REGION }} + purpose: test drift + required-keys: >- + AWS_ACCOUNT_ID,AWS_REGION,AWS_DRIFT_ROLE_ARN, + PULUMI_BACKEND_URL,PULUMI_SECRETS_PROVIDER,PULUMI_DRIFT_STACKS + - name: Configure AWS credentials via OIDC uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 with: - role-to-assume: ${{ env.AWS_DRIFT_ROLE_ARN }} + role-to-assume: ${{ steps.ci_config.outputs.aws-drift-role-arn }} role-session-name: gha-test-drift-${{ github.run_id }} - aws-region: ${{ env.AWS_REGION }} - allowed-account-ids: ${{ env.AWS_ACCOUNT_ID }} + aws-region: ${{ steps.ci_config.outputs.aws-region }} + allowed-account-ids: ${{ steps.ci_config.outputs.aws-account-id }} - name: Start development environment run: make start diff --git a/.github/workflows/well-architected-evidence.yml b/.github/workflows/well-architected-evidence.yml index e43ff9e..535a871 100644 --- a/.github/workflows/well-architected-evidence.yml +++ b/.github/workflows/well-architected-evidence.yml @@ -50,19 +50,12 @@ jobs: needs: - evidence_mode timeout-minutes: 20 - environment: test permissions: contents: read id-token: write pull-requests: read vulnerability-alerts: read env: - AWS_ACCOUNT_ID: ${{ vars.AWS_ACCOUNT_ID }} - AWS_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_DEFAULT_REGION: ${{ vars.AWS_REGION || 'eu-central-1' }} - AWS_PREVIEW_ROLE_ARN: ${{ vars.AWS_PREVIEW_ROLE_ARN }} - OPERATIONS_TOPIC_ARN: ${{ vars.OPERATIONS_TOPIC_ARN || 'arn:aws:sns:eu-central-1:891377212104:bootstrap-test-operations' }} - OPERATIONS_CLOUDTRAIL_NAME: ${{ vars.OPERATIONS_CLOUDTRAIL_NAME || 'bootstrap-test-management-events' }} RESTORE_DRILL_EVIDENCE: specs/issue-17-well-architected-5-of-5/restore-drill-evidence-2026-04-27.json QUESTION_MATRIX_EVIDENCE: specs/issue-17-well-architected-5-of-5/question-matrix-evidence-2026-05-17.json EXTERNAL_CONTROL_EVIDENCE: specs/issue-17-well-architected-5-of-5/external-control-evidence-2026-05-17.json @@ -80,6 +73,45 @@ jobs: ref: ${{ github.event.pull_request.head.sha || github.sha }} persist-credentials: false + - name: Select test AWS CI configuration + id: ci_config_target + env: + GITHUB_EVENT_NAME: ${{ github.event_name }} + AWS_TEST_PR_CI_CONFIG_ROLE_ARN: ${{ vars.AWS_TEST_PR_CI_CONFIG_ROLE_ARN }} + AWS_TEST_CI_CONFIG_ROLE_ARN: ${{ vars.AWS_TEST_CI_CONFIG_ROLE_ARN }} + run: | + set -euo pipefail + if [[ "${GITHUB_EVENT_NAME}" == "pull_request" ]]; then + ci_environment="test-pr" + config_role_arn="${AWS_TEST_PR_CI_CONFIG_ROLE_ARN}" + config_variable_name="AWS_TEST_PR_CI_CONFIG_ROLE_ARN" + else + ci_environment="test" + config_role_arn="${AWS_TEST_CI_CONFIG_ROLE_ARN}" + config_variable_name="AWS_TEST_CI_CONFIG_ROLE_ARN" + fi + if [[ -z "${config_role_arn}" ]]; then + printf 'error: %s must be set for %s AWS CI configuration.\n' \ + "${config_variable_name}" "${ci_environment}" >&2 + exit 1 + fi + { + echo "environment=${ci_environment}" + echo "config-role-arn=${config_role_arn}" + } >> "${GITHUB_OUTPUT}" + + - name: Load test AWS CI configuration + id: ci_config + uses: ./.github/actions/load-aws-ci-env + with: + environment: ${{ steps.ci_config_target.outputs.environment }} + config-role-arn: ${{ steps.ci_config_target.outputs.config-role-arn }} + aws-region: ${{ vars.AWS_TEST_REGION }} + purpose: Well-Architected evidence + required-keys: >- + AWS_ACCOUNT_ID,AWS_REGION,AWS_PREVIEW_ROLE_ARN, + OPERATIONS_TOPIC_ARN,OPERATIONS_CLOUDTRAIL_NAME + - name: Validate evidence prerequisites run: | missing=() @@ -126,17 +158,19 @@ jobs: - name: Configure AWS credentials via OIDC uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 with: - role-to-assume: ${{ env.AWS_PREVIEW_ROLE_ARN }} + role-to-assume: ${{ steps.ci_config.outputs.aws-preview-role-arn }} role-session-name: gha-wa-evidence-${{ github.run_id }} - aws-region: ${{ env.AWS_REGION }} - allowed-account-ids: ${{ env.AWS_ACCOUNT_ID }} + aws-region: ${{ steps.ci_config.outputs.aws-region }} + allowed-account-ids: ${{ steps.ci_config.outputs.aws-account-id }} - name: Emit evidence context run: | account_id="$(aws sts get-caller-identity --query Account --output text)" - echo "GitHub environment: test" + echo "CI config secret: ${CI_CONFIG_SECRET_ID}" + echo "Backend type: s3" echo "Commit SHA: ${GITHUB_SHA}" - echo "AWS account ID: ${account_id}" + echo "Expected AWS account ID: ${AWS_ACCOUNT_ID}" + echo "Current AWS account ID: ${account_id}" echo "AWS region: ${AWS_REGION}" echo "Role purpose: Well-Architected evidence" diff --git a/Makefile b/Makefile index 0324f71..3829229 100644 --- a/Makefile +++ b/Makefile @@ -131,7 +131,7 @@ pulumi-plan: ## Save a reviewed Pulumi update plan for the selected stack. @$(COMPOSE) run --rm $(COMPOSE_GITHUB_TOKEN) $(COMPOSE_PULUMI_ENV) \ $(COMPOSE_SERVICE) $(REPO_PYTHON) ./scripts/run_pulumi_command.py plan -pulumi-up: ## Apply the current Pulumi infrastructure plan. +pulumi-up: ## Apply directly for local/admin use only; GitHub Actions must use pulumi-up-plan. @$(COMPOSE) run --rm $(COMPOSE_GITHUB_TOKEN) $(COMPOSE_PULUMI_ENV) \ $(COMPOSE_SERVICE) $(REPO_PYTHON) ./scripts/run_pulumi_command.py up @@ -409,7 +409,7 @@ report-well-architected-closeout: ## Render owner/admin Well-Architected closeou --question-verification "$${WELL_ARCHITECTED_QUESTION_VERIFICATION:-.artifacts/well-architected/question-verification.json}" \ --output "$${WELL_ARCHITECTED_CLOSEOUT_OUTPUT:-.artifacts/well-architected/owner-closeout-bundle.md}" -configure-github-repository-controls: ## Print, apply, or verify GitHub ruleset and prod environment controls. +configure-github-repository-controls: ## Print, apply, or verify GitHub ruleset and protected environment controls. $(REPO_PYTHON) ./scripts/configure_github_repository_controls.py \ --repo "$(GITHUB_REPOSITORY_CONTROLS_REPO)" \ --prod-reviewer "$(GITHUB_REPOSITORY_CONTROLS_PROD_REVIEWER)" \ diff --git a/README.md b/README.md index 54b87e6..f15eb22 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Production-ready scaffold for teams that want to ship infrastructure-as-code wit - Pulumi (Python) starter that exports environment metadata and tagging helpers. - Reproducible Docker Compose workspace with a Pulumi-ready container and helper `make` tasks. - CI pipelines for structural, policy, preview, security, unit, integration, mutation, and CLI-level checks. -- Multi-account GitHub environment guidance for `test`, `prod-preview`, and protected `prod` workflows. +- Multi-account AWS Secrets Manager CI guidance for `test-pr`, `test`, `prod-preview`, and protected `prod` workflows. - Release and template-sync automations to keep downstream repos aligned. - Documentation on AWS credential management for secure automation using GitHub OIDC and short-lived credentials. @@ -130,18 +130,20 @@ single Make invocation instead of exporting it globally. Run `make doctor` when you need a fast prerequisite check before debugging local Docker or Compose behavior. -`make pulumi-preview` and `make pulumi-up` automatically enable the repository -policy pack. If the shared `uv` environment inside the container is missing -core Pulumi Python dependencies, the bootstrap helper resyncs it from -`uv.lock` before Pulumi starts. The policy runtime is refreshed separately in -`policy/.venv` from `policy/requirements.txt` so Pulumi starts the policy pack -consistently in Docker, CI, and local shells. The interactive Pulumi targets -also log into the configured backend automatically, falling back to the local -file backend under `.pulumi-backend/` when no shared backend is configured, -select the first committed `Pulumi..yaml` file by default, and expect -shared backends to use an AWS KMS-backed secrets provider. For test/prod CI -setup, configure account-local GitHub environments and OIDC roles in -[GitHub Actions Secrets and Variables](docs/github-actions-secrets.md). +`make pulumi-preview`, `make pulumi-plan`, and `make pulumi-up-plan` +automatically enable the repository policy pack. If the shared `uv` +environment inside the container is missing core Pulumi Python dependencies, +the bootstrap helper resyncs it from `uv.lock` before Pulumi starts. The policy +runtime is refreshed separately in `policy/.venv` from `policy/requirements.txt` +so Pulumi starts the policy pack consistently in Docker, CI, and local shells. +Apply reviewed changes through saved plans: run `make pulumi-plan`, review the +plan artifacts and guardrails, then run `make pulumi-up-plan` for the selected +stack. Shared backends must use an AWS KMS-backed secrets provider. For +test/prod CI setup, store account-local values in AWS Secrets Manager JSON +secrets, project them directly from AWS Secrets Manager, and configure OIDC +roles as described in [GitHub Actions Secrets and Variables](docs/github-actions-secrets.md). +The secure human setup sequence is in the +[AWS Secrets Manager CI cutover manual](docs/aws-secrets-manager-ci-cutover.md). ## Security diff --git a/docker-compose.yml b/docker-compose.yml index a295b0d..2a6baf5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -22,7 +22,6 @@ services: required: false # Pass GITHUB_TOKEN explicitly to the relevant Make targets only when needed. environment: - - PULUMI_ACCESS_TOKEN - PULUMI_BACKEND_URL - AWS_ACCESS_KEY_ID - AWS_SECRET_ACCESS_KEY diff --git a/docs/README.md b/docs/README.md index 98fc47f..c1b9b3f 100644 --- a/docs/README.md +++ b/docs/README.md @@ -69,7 +69,7 @@ report-security-account-attestation Render a non-secret security account attest report-production-dr-owner-evidence Render non-secret production DR owner evidence from collector evidence. start Initialize and start the Pulumi development environment. pulumi-preview Preview infrastructure changes with the policy pack enforced. -pulumi-up Apply the current infrastructure plan with the policy pack enforced. +pulumi-up Apply directly for local/admin use only; GitHub Actions must use saved plans. pulumi-refresh Sync the Pulumi stack with live cloud resources. pulumi-destroy Tear down the stack (irreversible; use with caution). sh Open a shell inside the Pulumi container. @@ -148,11 +148,18 @@ concurrency groups, bounded job timeouts, pinned actions, and a shared `make start` bootstrap path so local and GitHub-hosted validation stay aligned. -Privileged issue 18 workflows use GitHub environments for account separation: -`test` for trusted PR preview, test apply, and test drift; `prod-preview` for -production preview and drift; and protected `prod` for production apply. -Configure account-local variables, OIDC roles, Pulumi backend URLs, and AWS -KMS-backed Pulumi secrets providers in the [GitHub Actions Secrets guide](github-actions-secrets.md). +Privileged issue 20 workflows use fixed AWS Secrets Manager CI secrets for account +separation: `test-pr` for trusted PR preview, IAM validation, and same-repo PR +evidence, `test` for test apply, drift, operations alert triage, and main-branch +evidence, `prod-preview` for production preview and drift, and `prod` for +production apply. GitHub keeps only the protected `prod` Environment for +approval. Configure account-local variables, OIDC roles, Pulumi backend URLs, +and AWS KMS-backed Pulumi secrets providers in the +[GitHub Actions Secrets guide](github-actions-secrets.md). +Use the [AWS Secrets Manager CI cutover manual](aws-secrets-manager-ci-cutover.md) +for the human setup sequence that populates AWS Secrets Manager, configures +GitHub repository variables, verifies privileged CI, removes legacy GitHub +Environment variables, and reconciles legacy operations-alert issues. The PR-comment path accepts `/pulumi test plan`, `/pulumi test up`, `/pulumi prod plan`, and `/pulumi prod up`; production comments run the test account apply and post-apply drift gates successfully before production starts. @@ -240,7 +247,7 @@ Continuous integration runs automatically on every pull request. You can also va - Use the focused suites when you only need one slice: `make build`, `make test-pulumi`, `make test-repository-fanout`, `make test-policy`, `make test-crossguard`, `make test-quality`, `make test-repo-hygiene`, `make test-unit`, `make test-integration`, `make test-coverage`, `make test-mutation`, `make test-cli`, `make test-security`, `make test-guardrails`. - Use `make test-policy` when you are changing guardrails or adding new AWS resource types that should be covered by the policy pack. - `make test-mutation` intentionally uses the focused `pulumi/app` unit-test surface by default so the PR mutation check stays fast; override `MUTATION_TEST_TARGETS` or `MUTATION_TESTS_DIR` only when you explicitly need a broader, slower mutation run. -- `make pulumi-preview` and `make pulumi-up` sync the shared `uv` environment if needed, refresh `policy/.venv`, and then run Pulumi with the repository policy pack enabled. +- `make pulumi-preview`, `make pulumi-plan`, and `make pulumi-up-plan` sync the shared `uv` environment if needed, refresh `policy/.venv`, and then run Pulumi with the repository policy pack enabled. - Run `make test` to execute the faster structural, policy, quality, repo-hygiene, unit, integration, coverage, and CLI checks together after a prerequisite sanity check. - Use `make ci-pr` to mirror the non-mutation GitHub pull-request battery, including the prerequisite check, image build, security scans, preview generation, and policy suite. - Execute `make ci` to run the full local equivalent of all GitHub checks, including the prerequisite check, image build, and mutation suite. @@ -248,8 +255,8 @@ Continuous integration runs automatically on every pull request. You can also va - If Pulumi plugin downloads hit GitHub rate limits locally, pass `GITHUB_TOKEN="$(gh auth token)"` explicitly to the preview-oriented target you are running instead of exporting it globally. -- `make pulumi-preview` to review planned resources before applying. -- `make pulumi-up` followed by `pulumi stack output` to inspect applied results. +- `make pulumi-plan` to save a reviewed plan for the selected stack. +- `make pulumi-up-plan` followed by `pulumi stack output` to inspect applied results from the reviewed saved plan. - GitHub Actions mirrors `make ci-pr` through the `Pulumi Local Test Battery` workflow, while mutation remains isolated in `pulumi-mutation.yml`. - `Pulumi PR Guardrails` and `Security Scans` also expose their focused Make entrypoints as dedicated CI checks. @@ -266,7 +273,7 @@ Use the dedicated [testing guide](testing.md) when you need to know exactly what ## Repository Synchronization -This template feeds other VilnaCRM infrastructure projects through [`actions-template-sync`](https://github.com/AndreasAugustin/actions-template-sync). For authentication guidance, see the [Template Sync secrets](github-actions-secrets.md#template-sync-secrets). +This template feeds other VilnaCRM infrastructure projects through [`actions-template-sync`](https://github.com/AndreasAugustin/actions-template-sync). For authentication guidance, see the [GitHub Actions secrets and variables guide](github-actions-secrets.md). Remember to grant least privilege, rotate secrets regularly, and monitor workflow logs. diff --git a/docs/alert-routing-evidence.md b/docs/alert-routing-evidence.md index c33b6fd..2c99474 100644 --- a/docs/alert-routing-evidence.md +++ b/docs/alert-routing-evidence.md @@ -68,14 +68,62 @@ service-event injection. ## Human Consumption Route Operations alerts are consumed by the scheduled -`.github/workflows/operations-alert-triage.yml` workflow. The workflow assumes -the dedicated test account operations alert triage role through GitHub OIDC, -reads metadata-only messages from `bootstrap-test-operations-alerts`, creates a -GitHub issue in -`VilnaCRM-Org/bootstrap-infrastructure` with sanitized SNS/EventBridge source, -detail type, and event-time metadata, and deletes messages only after the issue -is created. It must not write raw alert payloads, stack exports, credentials, -tokens, or private incident notes to GitHub. +`.github/workflows/operations-alert-triage.yml` workflow. The workflow loads the +fixed `vilnacrm-org/bootstrap-infrastructure/test` AWS Secrets Manager CI secret, assumes the +dedicated test account operations alert triage role through GitHub OIDC, reads +metadata-only messages from `bootstrap-test-operations-alerts`, and writes +sanitized GitHub issue records in `VilnaCRM-Org/bootstrap-infrastructure`. + +The issue body includes an `operations-alert:fingerprint=` marker built +from stable event fields such as source, detail type, state, backup vault, +backup plan, backup rule, and resource ARN. Repeated notifications for the same +underlying route update the open canonical issue with a comment instead of +creating duplicate issues. The workflow deletes SQS messages only after the +GitHub issue create or comment operation succeeds. It must not write raw alert +payloads, stack exports, credentials, tokens, or private incident notes to +GitHub. + +Legacy operations-alert issues that predate the fingerprint marker are not +automatically absorbed by the search query. Treat the first post-merge +fingerprinted issue as the canonical record for that alert stream, then link +and close older duplicate issues only after an SRE confirms the sanitized +events share the same underlying AWS Backup state, vault, plan or rule, and +protected resource. A comment on a legacy issue is not enough for future +workflow dedupe because the workflow searches issue bodies for the marker. +If the original SQS messages were already drained and no new matching alert +arrives, use the manual **Operations Alert Canonical Backfill** workflow to +create or update the canonical fingerprinted issue from SRE-confirmed stable +fields before running legacy reconciliation. The backfill workflow runs behind +the same `operations-alert-reconcile` GitHub Environment, requires an HTTPS +`sre_confirmation_reference`, accepts one `stable_event_json` object containing +the confirmed EventBridge `source`, `detailType`, `state`, `resourceArn`, +optional AWS Backup stable fields, optional `detail`, and optional `resources`, +and requires this exact confirmation sentence: + +```text +I confirm these stable fields represent the canonical operations alert stream +``` + +After SRE confirmation, use the manual **Operations Alert Legacy Reconcile** +workflow to close legacy duplicates. The workflow requires a canonical issue +whose body already contains `operations-alert:fingerprint=`, accepts only +unmarked open `Operations alerts queued:` issues as legacy duplicates, requires +an HTTPS SRE confirmation reference, and uses GitHub duplicate closure +semantics. It runs behind the +`operations-alert-reconcile` GitHub Environment so repository administrators can +require SRE or reviewer approval before any duplicate closure. It does not +request AWS or GitHub OIDC credentials; it only writes issue comments and +duplicate closures. + +The workflow confirmation input must exactly match this sentence, and the +`sre_confirmation_reference` input must point to the sanitized SRE confirmation +comment or ticket. Do not put raw alert payloads, credentials, stack exports, +tokens, or private incident notes in that referenced record. + +```text +I confirm these legacy issues match the canonical operations alert stream +``` + The shared Pulumi automation role carries an explicit deny for alert-queue `sqs:ReceiveMessage` and `sqs:DeleteMessage`; only the dedicated triage role may drain alert messages. @@ -110,7 +158,11 @@ The `Well-Architected Evidence` workflow now runs on pull requests, pushes to Scheduled runs remain advisory even if evidence enforcement is enabled, upload the metadata-only evidence bundle, and retain the artifact for 90 days. The separate operations alert triage workflow creates GitHub issues from queued -alert metadata every 30 minutes. +alert metadata every 30 minutes. Mixed SQS batches are split by stable alert +stream before GitHub issue search/create/comment operations, so unrelated +streams do not collapse into one duplicate marker. Legacy issues without the +`operations-alert:fingerprint=` marker still require SRE confirmation and an +HTTPS `sre_confirmation_reference` before backfill or closure. After a scheduled or manual collector run, SRE can render a dated observation record from `.artifacts/well-architected/evidence.json`: diff --git a/docs/aws-secrets-manager-ci-cutover.md b/docs/aws-secrets-manager-ci-cutover.md new file mode 100644 index 0000000..fd1a98d --- /dev/null +++ b/docs/aws-secrets-manager-ci-cutover.md @@ -0,0 +1,202 @@ +# AWS Secrets Manager CI Cutover Manual + +This project does not require Pulumi Cloud or Pulumi ESC for privileged CI. +GitHub Actions uses GitHub OIDC to assume AWS roles, reads account-local CI +configuration from AWS Secrets Manager, and then runs Pulumi CLI with the S3 +backend and AWS KMS secrets provider. + +## Architecture + +Runtime flow: + +1. GitHub Actions requests an OIDC token for the fixed workflow job. +2. `.github/actions/load-aws-ci-env` assumes the matching + `GitHubCiConfigRead-*` role in AWS. +3. The action reads one AWS Secrets Manager JSON secret. +4. The action validates required keys without printing values. +5. The workflow assumes the preview, apply, drift, or operations role from the + loaded JSON. +6. Pulumi uses `PULUMI_BACKEND_URL=s3://...` and + `PULUMI_SECRETS_PROVIDER=awskms://...`. + +Pulumi Cloud, Pulumi ESC, and `PULUMI_ACCESS_TOKEN` are not part of this setup. + +## Required GitHub Variables + +Set these repository variables. They are metadata, not secrets: + +| Variable | Purpose | +| --- | --- | +| `AWS_TEST_REGION` | Region containing the test account CI config secrets | +| `AWS_TEST_PR_CI_CONFIG_ROLE_ARN` | Reads `/bootstrap-infrastructure/ci/test-pr` | +| `AWS_TEST_CI_CONFIG_ROLE_ARN` | Reads `/bootstrap-infrastructure/ci/test` | +| `AWS_PROD_REGION` | Region containing the prod account CI config secrets | +| `AWS_PROD_PREVIEW_CI_CONFIG_ROLE_ARN` | Reads `/bootstrap-infrastructure/ci/prod-preview` | +| `AWS_PROD_CI_CONFIG_ROLE_ARN` | Reads `/bootstrap-infrastructure/ci/prod` | + +The Pulumi stack output `githubCiConfigReadRoleArns` contains the role ARNs. + +## Required Secrets Manager Payloads + +Create one JSON secret value per fixed CI suffix: + +| Suffix | Secret ID | +| --- | --- | +| `test-pr` | `/bootstrap-infrastructure/ci/test-pr` | +| `test` | `/bootstrap-infrastructure/ci/test` | +| `prod-preview` | `/bootstrap-infrastructure/ci/prod-preview` | +| `prod` | `/bootstrap-infrastructure/ci/prod` | + +Common keys: + +- `AWS_ACCOUNT_ID` +- `AWS_REGION` +- `PULUMI_BACKEND_URL` +- `PULUMI_SECRETS_PROVIDER` + +Role keys by workflow need: + +- `AWS_PREVIEW_ROLE_ARN` +- `AWS_APPLY_ROLE_ARN` +- `AWS_DRIFT_ROLE_ARN` +- `AWS_OPERATIONS_ALERT_TRIAGE_ROLE_ARN` + +Other supported keys: + +- `PULUMI_PREVIEW_STACKS` +- `PULUMI_DRIFT_STACKS` +- `OPERATIONS_ALERT_QUEUE_NAME` +- `OPERATIONS_TOPIC_ARN` +- `OPERATIONS_CLOUDTRAIL_NAME` + +Use `put-secret-value` from a local private JSON file. Do not paste secret +payloads into chat, GitHub issues, workflow logs, Pulumi config, or docs. Do not +use `get-secret-value` for verification because it prints secret material. + +## Fix Local AWS CLI For Test + +First remove stale environment credentials from the shell that will run the +cutover: + +```bash +unset AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_SESSION_TOKEN AWS_PROFILE +``` + +If the test account uses AWS SSO: + +```bash +aws sso login --profile +AWS_PROFILE= aws sts get-caller-identity --output json +``` + +If the test account uses another credential broker, run the approved login +command for that broker, then verify: + +```bash +AWS_PROFILE= aws sts get-caller-identity --output json +``` + +The command must return the expected 12-digit test account ID. Share only the +profile name and account ID if help is needed; never share access keys or +session tokens. + +## Bootstrap Or Update AWS Resources + +Apply the Pulumi `test` and `prod` stacks with credentials for the owning AWS +account. The stacks create: + +- Secrets Manager secret containers +- one `GitHubCiConfigRead-*` role per CI suffix +- least-privilege policies scoped to the matching secret only +- GitHub OIDC trust limited by repo subject and workflow ref + +After each apply, capture: + +```bash +pulumi -C pulumi stack output ciConfigurationSecretIds --stack test +pulumi -C pulumi stack output githubCiConfigReadRoleArns --stack test +pulumi -C pulumi stack output ciConfigurationSecretIds --stack prod +pulumi -C pulumi stack output githubCiConfigReadRoleArns --stack prod +``` + +## Populate Secret Values + +For each suffix, prepare a private JSON file and write it with: + +```bash +AWS_PROFILE= aws secretsmanager put-secret-value \ + --secret-id /bootstrap-infrastructure/ci/ \ + --secret-string file://.json +``` + +Verify metadata only: + +```bash +AWS_PROFILE= aws secretsmanager describe-secret \ + --secret-id /bootstrap-infrastructure/ci/ +``` + +Do not run `aws secretsmanager get-secret-value` during verification. +Do not use `get-secret-value` for verification. + +## Configure GitHub + +Set repository variables from the stack outputs: + +```bash +gh variable set AWS_TEST_REGION --body '' +gh variable set AWS_TEST_PR_CI_CONFIG_ROLE_ARN --body '' +gh variable set AWS_TEST_CI_CONFIG_ROLE_ARN --body '' +gh variable set AWS_PROD_REGION --body '' +gh variable set AWS_PROD_PREVIEW_CI_CONFIG_ROLE_ARN --body '' +gh variable set AWS_PROD_CI_CONFIG_ROLE_ARN --body '' +``` + +The privileged workflows must keep `id-token: write`. They must not use +`PULUMI_ACCESS_TOKEN`, Pulumi Cloud, Pulumi ESC, or GitHub Environment variables +for account-local CI configuration. + +## Validate + +Re-run these checks after the variables and AWS secrets are present: + +- `Pulumi PR Guardrails` +- `Pulumi Test Deploy` +- `Pulumi Production` preview and protected apply +- `Nightly Guardrails` +- `Operations Alert Triage` +- `Well-Architected Evidence` + +Expected loader summary: + +- source of truth is AWS Secrets Manager +- Pulumi Cloud/ESC is not used +- secret values are not printed + +## Legacy GitHub Environment Cleanup + +After AWS-only privileged CI is green, run +`GitHub Environment Legacy Variable Cleanup` with `dry_run=true`. Review the +planned deletions. The workflow requires `GH_ENVIRONMENT_ADMIN_TOKEN` because +GitHub's default token cannot delete repository environment variables. Then +rerun with `dry_run=false` and this confirmation: + +```text +I confirm AWS Secrets Manager-backed privileged CI is green and legacy GitHub Environment variables can be removed +``` + +Run `Operations Alert Legacy Reconcile` separately if legacy alert issues need +manual reconciliation, and provide the required `sre_confirmation_reference`. +If no new queued alert exists to create a canonical fingerprinted issue, run +`Operations Alert Canonical Backfill` first with one SRE-confirmed +`stable_event_json` object and this confirmation: + +```text +I confirm these stable fields represent the canonical operations alert stream +``` + +Use this reconciliation confirmation when closing duplicates: + +```text +I confirm these legacy issues match the canonical operations alert stream +``` diff --git a/docs/ci-architecture.md b/docs/ci-architecture.md index e96fe59..9b401e9 100644 --- a/docs/ci-architecture.md +++ b/docs/ci-architecture.md @@ -35,25 +35,35 @@ Docker-backed pull request checks use the same Docker workspace and the same ## Multi-Account Environments -Issue 18 uses GitHub environments as the account and approval boundary: +Issue 20 moves privileged account configuration from GitHub Environment +variables into fixed AWS Secrets Manager JSON secrets: -| GitHub environment | AWS account intent | Workflow use | +| CI suffix | AWS Secrets Manager secret ID | Workflow use | | --- | --- | --- | -| `test` | Test account | Trusted PR previews, main-branch test applies, test drift | -| `prod-preview` | Production account with preview-only access | Production preview and production drift | -| `prod` | Production account with apply access | Production apply after approval | - -Each environment owns its own `AWS_ACCOUNT_ID`, OIDC role ARNs, -`PULUMI_BACKEND_URL`, `PULUMI_SECRETS_PROVIDER`, region, and stack list. Shared -Pulumi backends must use AWS KMS secrets providers via `PULUMI_SECRETS_PROVIDER` -and stack initialization or migration must pass `--secrets-provider -"$PULUMI_SECRETS_PROVIDER"`. +| `test-pr` | `/bootstrap-infrastructure/ci/test-pr` | Trusted same-repo PR previews, IAM validation, and PR evidence collection | +| `test` | `/bootstrap-infrastructure/ci/test` | Main-branch test applies, test drift, operations alert triage, Well-Architected evidence | +| `prod-preview` | `/bootstrap-infrastructure/ci/prod-preview` | Production preview, IAM validation, and production drift | +| `prod` | `/bootstrap-infrastructure/ci/prod` | Production apply after protected GitHub `prod` approval | + +The workflows pass only fixed suffixes such as `test`, `prod-preview`, or +`prod` to `.github/actions/load-aws-ci-env`. The loader assumes the matching +`GitHubCiConfigRead-*` role through GitHub OIDC, reads the JSON secret, validates +`AWS_ACCOUNT_ID`, OIDC role ARNs, `PULUMI_BACKEND_URL`, +`PULUMI_SECRETS_PROVIDER`, region, and stack lists, then exports only validated +keys to the job environment. The Pulumi stacks manage the Secrets Manager secret +containers and the `GitHubCiConfigRead-*` roles, but humans populate the JSON +values in AWS Secrets Manager. Shared Pulumi backends must use AWS KMS secrets +providers via `PULUMI_SECRETS_PROVIDER`, and stack initialization or migration +must pass +`--secrets-provider "$PULUMI_SECRETS_PROVIDER"`. GitHub keeps only the protected +`prod` Environment for approval; `test` and `prod-preview` are not GitHub +deployment environments. Production apply is intentionally split from production preview. The preview job -first verifies that the requested commit already has a successful `Pulumi Test -Deploy` run on `main`, then records sanitized evidence for the reviewed commit. -The apply job runs only in the protected `prod` environment after required -reviewers approve it and the commit SHA is still the reviewed SHA. +loads `prod-preview`, verifies that the requested commit already has a +successful `Pulumi Test Deploy` run on `main`, and records sanitized evidence +for the reviewed commit. The apply job loads `prod` only after required GitHub +`prod` reviewers approve it and the commit SHA is still the reviewed SHA. PR-comment production promotion follows the same account boundaries, but the trusted runner performs the test sequence inside the command workflow before @@ -100,10 +110,11 @@ ask for broader access only where automation actually needs to write tags, releases, or pull requests. Privileged infrastructure jobs add `id-token: write` only when they need GitHub -OIDC. Fork pull-request jobs do not bind a GitHub environment and do not request -OIDC token permission. Privileged jobs should pass `allowed-account-ids` with -the active environment's `AWS_ACCOUNT_ID` and use purpose-specific roles: -preview/drift roles for non-mutating checks and apply roles for deployments. +OIDC. Fork pull-request jobs do not load AWS credentials, do not bind a GitHub +environment, and do not request OIDC token permission. Privileged jobs pass +`allowed-account-ids` with the `AWS_ACCOUNT_ID` loaded from AWS Secrets Manager +and use purpose-specific roles: preview/drift roles for non-mutating checks and +apply roles for deployments. ## Local Parity @@ -137,7 +148,7 @@ Use this checklist: 4. add `concurrency` 5. set `timeout-minutes` 6. call `make start` if the job uses the Docker workspace -7. bind privileged jobs to the correct GitHub environment +7. load AWS Secrets Manager-backed configuration through the correct fixed AWS Secrets Manager CI secret 8. use OIDC with explicit account allow-listing for AWS jobs 9. extend the structural tests and docs in the same PR diff --git a/docs/ci-guardrails.md b/docs/ci-guardrails.md index 52c0839..c21d722 100644 --- a/docs/ci-guardrails.md +++ b/docs/ci-guardrails.md @@ -79,8 +79,8 @@ developers use: 1. a credential-free mode-selection job checks whether the pull request came from a fork -2. trusted same-repo runs use `make start` and - `make publish-pulumi-preview-summary` in the `test` GitHub environment +2. trusted same-repo runs load the fixed `test-pr` AWS Secrets Manager CI secret, then use + `make start` and `make publish-pulumi-preview-summary` 3. fork pull requests use `make start` and `make test-preview-unprivileged` without a GitHub environment, OIDC permission, AWS credentials, or environment variables @@ -92,11 +92,11 @@ Preview artifacts are written under `.artifacts/pulumi-preview/` and uploaded to GitHub Actions. The preview summary is appended to `GITHUB_STEP_SUMMARY` so reviewers can inspect the plan without digging through raw logs first. -For issue 18, privileged previews are environment-scoped: +For issue 20, privileged previews are AWS Secrets Manager-scoped: -- trusted same-repo PRs use the `test` GitHub environment and preview the +- trusted same-repo PRs use the fixed `test-pr` AWS Secrets Manager CI secret and preview the configured test stack -- production release previews use the `prod-preview` GitHub environment and +- production release previews use the fixed `prod-preview` AWS Secrets Manager CI secret and preview the production stack without apply permissions - fork PRs stay on the unprivileged artifact path and never receive AWS credentials or `id-token: write` permission @@ -109,11 +109,10 @@ artifact, so apply jobs use a plan whose preview has already passed guardrails. selected stack, backend URL, commit SHA, plan hash, and preview hash. `make pulumi-up-plan` refuses to apply when the manifest is missing, stale, from a different commit or backend, or when the saved plan hash no longer matches. -Production applies remain saved-plan-only. The test deployment workflow may -fall back to a direct `make pulumi-up` only when `pulumi up --plan` fails with -Pulumi's known KMS-backed saved-plan decryption error after the same-run -preview, destructive-diff, and IAM validation gates have passed under the -test-state concurrency lock. +Test and production applies remain saved-plan-only. If a saved plan cannot be +applied, the workflow fails instead of switching to a direct apply path; rerun +preview and plan generation after fixing the underlying backend, KMS, or plan +artifact issue. Stack selection follows this order: @@ -196,30 +195,67 @@ semantic validation for the rendered policy documents. The guardrail workflows are OIDC-first. They do not use long-lived `AWS_ACCESS_KEY_ID` or `AWS_SECRET_ACCESS_KEY` repository secrets. -Privileged jobs read account-specific values from the active GitHub -environment, not repository-wide variables. The required environment variables -are: +Privileged jobs read account-specific values directly from fixed AWS Secrets +Manager JSON secrets, not from GitHub Environment variables, repository-wide +variables, Pulumi Cloud, or Pulumi ESC. AWS Secrets Manager is the vault and +source of truth for account-local values. The fixed CI suffixes are: + +| CI suffix | Use | +| --- | --- | +| `test-pr` | Trusted same-repo PR preview, IAM validation, and PR evidence collection | +| `test` | Test apply, test drift, operations alert triage, and Well-Architected evidence | +| `prod-preview` | Production preview, IAM validation, and drift | +| `prod` | Production apply after protected GitHub `prod` approval | + +Workflow call sites pass fixed suffixes like `test`, `test-pr`, `prod-preview`, +and `prod`; PR input, issue comments, and repository-dispatch payloads cannot +supply arbitrary secret names. Each suffix maps to one AWS Secrets Manager JSON +secret in the owning AWS account: + +| AWS Secrets Manager CI secret suffix | AWS Secrets Manager secret ID | +| --- | --- | +| `test-pr` | `/bootstrap-infrastructure/ci/test-pr` | +| `test` | `/bootstrap-infrastructure/ci/test` | +| `prod-preview` | `/bootstrap-infrastructure/ci/prod-preview` | +| `prod` | `/bootstrap-infrastructure/ci/prod` | + +The Pulumi `test` and `prod` stacks manage these AWS Secrets Manager secret +containers and the account-local `GitHubCiConfigRead-*` roles. Pulumi does not +create a `SecretVersion` or own the JSON values. Maintainers populate and rotate +the JSON values in AWS Secrets Manager after the stack creates the containers. +The workflow loader assumes the matching `GitHubCiConfigRead-*` role through +GitHub OIDC, calls `aws secretsmanager get-secret-value`, parses JSON, validates +the required keys, and exports only validated environment variables. Do not store +AWS account IDs, role ARNs, backend URLs, stack lists, or secrets-provider URIs +in Pulumi config, GitHub Environment variables, workflow logs, or docs. + +The required AWS CI config `environmentVariables` are: | Variable | Purpose | | --- | --- | | `AWS_ACCOUNT_ID` | Expected AWS account for `allowed-account-ids` and audit evidence | +| `AWS_REGION` | AWS region used by `configure-aws-credentials` and Pulumi | | `AWS_PREVIEW_ROLE_ARN` | OIDC role assumed by preview and IAM validation jobs | +| `AWS_APPLY_ROLE_ARN` | OIDC role used by test or production apply jobs | | `AWS_DRIFT_ROLE_ARN` | OIDC role assumed by drift jobs | | `PULUMI_BACKEND_URL` | Account-specific shared Pulumi backend | | `PULUMI_SECRETS_PROVIDER` | AWS KMS Pulumi secrets provider URI used by stacks | +| `PULUMI_PREVIEW_STACKS` | Comma-separated stack list for preview and apply | +| `PULUMI_DRIFT_STACKS` | Comma-separated stack list for drift checks | -Optional or job-specific environment variables: +Job-specific AWS CI variables: | Variable | Purpose | | --- | --- | -| `AWS_REGION` | AWS region used by `configure-aws-credentials`; defaults to `eu-central-1` | -| `PULUMI_PR_BACKEND_URL` | Optional PR-only backend, useful while a legacy shared test stack is being migrated | -| `PULUMI_PR_PREVIEW_STACKS` | Optional PR-only stack list; used by trusted PR and test deploy fallbacks | -| `PULUMI_PREVIEW_STACKS` | Optional comma-separated stack list for preview | -| `PULUMI_DRIFT_STACKS` | Optional comma-separated stack list for nightly drift checks | -| `AWS_APPLY_ROLE_ARN` | OIDC role used by test or production apply jobs | +| `AWS_OPERATIONS_ALERT_TRIAGE_ROLE_ARN` | Dedicated OIDC role for operations alert triage | +| `OPERATIONS_ALERT_QUEUE_NAME` | SQS queue drained by operations alert triage | | `OPERATIONS_TOPIC_ARN` | Standard metadata input for evidence collection when the environment reuses an existing operations SNS topic | | `OPERATIONS_CLOUDTRAIL_NAME` | Standard metadata input for evidence collection when the environment reuses an existing operations CloudTrail | + +Optional non-secret repository variables: + +| Variable | Purpose | +| --- | --- | | `RESTORE_DRILL_EVIDENCE` | Standard metadata input pointing to the latest workload-scoped restore drill evidence record | | `DEPENDABOT_EXCEPTION_EVIDENCE` | Optional non-secret exception evidence covering exact open default-branch Dependabot alert numbers when remediation cannot land immediately | | `ALERT_ROUTE_OBSERVATION_EVIDENCE` | Optional non-secret SRE-approved downstream alert-route observation evidence matching the live operations route | @@ -228,20 +264,11 @@ Optional or job-specific environment variables: | `QUESTION_MATRIX_EVIDENCE` | Standard metadata input pointing to the structured 57-question review evidence record | | `EXTERNAL_CONTROL_EVIDENCE` | Standard metadata input pointing to the structured external-control owner and freshness evidence record | -Optional environment secrets: - -| Secret | Purpose | -| --- | --- | -| `PULUMI_ACCESS_TOKEN` | Required only when the backend is the Pulumi Service | - Shared backends should use an AWS KMS-backed Pulumi secrets provider rather than a passphrase-managed stack secret flow. -`Pulumi Test Deploy` uses the generic backend, stack, apply-role, and drift-role -variables when they exist. In the `test` environment it can fall back to -`PULUMI_PR_BACKEND_URL`, `PULUMI_PR_PREVIEW_STACKS`, and `AWS_PREVIEW_ROLE_ARN` -so an existing single bootstrap automation role can apply its own narrowed -policy before creating new operations and cost-control resources. +`Pulumi Test Deploy` uses the `test` AWS Secrets Manager CI backend, stack list, apply role, and +drift role. Missing AWS CI values fail fast before AWS credentials are requested. Fork pull requests always run the unprivileged artifact path and the destructive diff gate. Same-repo pull requests fail fast when required @@ -251,7 +278,7 @@ paths remain same-repo only because they require OIDC-issued AWS credentials. Privileged jobs should emit sanitized evidence in the job summary or logs: -- GitHub environment name +- AWS Secrets Manager CI secret name - expected AWS account ID and selected AWS region - role purpose, such as preview, drift, or apply - Pulumi backend type, stack names, and guardrail mode @@ -418,9 +445,10 @@ The accepted Dependabot exception shape is also non-secret: ### Example IAM trust policy -Replace the account ID, organization, repository name, and GitHub environment +Replace the account ID, organization, repository name, workflow file, and branch with your own values. `` must be the target 12-digit AWS account ID -using digits only: +using digits only. Non-approval preview, drift, evidence, and test apply roles +trust fixed repository refs or pull requests plus fixed workflow refs: ```json { @@ -435,7 +463,20 @@ using digits only: "Condition": { "StringEquals": { "token.actions.githubusercontent.com:aud": "sts.amazonaws.com", - "token.actions.githubusercontent.com:sub": "repo:VilnaCRM-Org/bootstrap-infrastructure:environment:" + "token.actions.githubusercontent.com:sub": [ + "repo:VilnaCRM-Org/bootstrap-infrastructure:ref:refs/heads/main", + "repo:VilnaCRM-Org/bootstrap-infrastructure:pull_request" + ] + }, + "StringLike": { + "token.actions.githubusercontent.com:job_workflow_ref": [ + "VilnaCRM-Org/bootstrap-infrastructure/.github/workflows/pulumi-pr-guardrails.yml@refs/*", + "VilnaCRM-Org/bootstrap-infrastructure/.github/workflows/pulumi-test-deploy.yml@refs/heads/main", + "VilnaCRM-Org/bootstrap-infrastructure/.github/workflows/nightly-guardrails.yml@refs/heads/main", + "VilnaCRM-Org/bootstrap-infrastructure/.github/workflows/pulumi-prod.yml@refs/heads/main", + "VilnaCRM-Org/bootstrap-infrastructure/.github/workflows/pulumi-pr-command-runner.yml@refs/heads/main", + "VilnaCRM-Org/bootstrap-infrastructure/.github/workflows/well-architected-evidence.yml@refs/*" + ] } } } @@ -443,16 +484,27 @@ using digits only: } ``` -Use `environment:test` for test preview/apply roles, `environment:prod-preview` -for production preview and drift roles, and `environment:prod` only for the -production apply role. +Production apply is the only privileged path that should include a GitHub +Environment subject: + +```text +repo:VilnaCRM-Org/bootstrap-infrastructure:environment:prod +``` + +Production apply does not trust branch or pull request subjects. Production +preview, IAM validation, and drift jobs run from the protected default branch +through fixed `prod-preview` AWS CI configuration, while production apply requires +only the protected GitHub `prod` Environment subject. + +The operations alert triage role should trust only +`operations-alert-triage.yml@refs/heads/main`. ## Production protection Production release automation has two boundaries: -- `prod-preview` can create review evidence but cannot apply changes -- `prod` can apply only after GitHub environment approval, branch protection, +- `prod-preview` AWS CI config can create review evidence but cannot apply changes +- protected GitHub `prod` approval can apply only after branch protection, and commit SHA verification The production workflow also checks that the requested commit SHA already has a @@ -481,15 +533,33 @@ fresh GitHub runner would be misleading. The workflows are committed in this repository, but maintainers still need to: -1. create the GitHub OIDC IAM role in AWS -2. create `test`, `prod-preview`, and `prod` GitHub environments -3. set the environment variables and optional secrets listed above -4. enable required reviewers and branch restrictions on `prod` -5. mark the required PR checks in GitHub branch protection -6. decide whether production repositories want stricter stack lists or narrower +1. create or adopt the GitHub OIDC provider in AWS +2. apply the Pulumi `test` and `prod` stacks so AWS creates the four Secrets + Manager containers and `GitHubCiConfigRead-*` roles +3. populate the four AWS Secrets Manager JSON values in the owning AWS accounts +4. set the repository variables from `githubCiConfigReadRoleArns` and the AWS + regions +5. apply the Pulumi test and production stacks so the updated IAM trust policies + converge in AWS +6. run **GitHub Environment Legacy Variable Cleanup** first as a dry run, then + with the documented confirmation sentence after AWS Secrets Manager-backed privileged CI is + green +7. delete the temporary `GH_ENVIRONMENT_ADMIN_TOKEN` repository secret after + cleanup succeeds +10. create the protected `prod` GitHub Environment for production approval +11. enable required reviewers and branch restrictions on `prod` +12. create the protected `operations-alert-reconcile` GitHub Environment with + required SRE or reviewer approval and no account configuration before legacy + operations-alert backfill or issue closure; the manual backfill and closure + workflows also require an HTTPS `sre_confirmation_reference` to the sanitized + SRE confirmation record +13. mark the required PR checks in GitHub branch protection +14. confirm no stale AWS trust subjects or privileged GitHub Environment account + variables remain outside the protected `prod` approval boundary +15. decide whether production repositories want stricter stack lists or narrower IAM role scopes than the template defaults -Repository administrators can make steps 4 and 5 reproducible with: +Repository administrators can make the GitHub protection steps reproducible with: ```bash gh api graphql \ @@ -510,15 +580,16 @@ at the admin-rights preflight until a repository administrator runs it. Set `GITHUB_REPOSITORY_CONTROLS_MODE=--dry-run`, or omit the variable, to inspect the ruleset and protected environment payloads. Dry runs resolve the -reviewer login to the numeric GitHub user ID used by the environment API. Set +reviewer login to the numeric GitHub user ID used by the environment API and +print payloads for both `prod` and `operations-alert-reconcile`. Set `GITHUB_REPOSITORY_CONTROLS_MODE=--verify-only` after applying settings -manually or through another tool to re-read the active `main` ruleset and -`prod` environment without writing. With +manually or through another tool to re-read the active `main` ruleset plus the +`prod` and `operations-alert-reconcile` environments without writing. With `GITHUB_REPOSITORY_CONTROLS_MODE=--apply`, the helper writes the desired controls and then runs the same verification. Verification exits non-zero unless the required checks, pull-request review/thread-resolution rules, protected-branch deployment policy, self-review prevention, and configured -production reviewer are visible in GitHub metadata. +reviewer are visible in GitHub metadata for both protected environments. ## Current limitations diff --git a/docs/github-actions-secrets.md b/docs/github-actions-secrets.md index e85484d..6a70d70 100644 --- a/docs/github-actions-secrets.md +++ b/docs/github-actions-secrets.md @@ -1,139 +1,98 @@ # GitHub Actions Secrets and Variables -The hardened CI/CD layer in this repository is OIDC-first. Preview, IAM -validation, PR-comment plan/apply commands, and nightly drift detection are -designed to use short-lived AWS credentials issued through GitHub Actions OIDC. -Do not add long-lived static AWS access keys for these workflows. - -## GitHub environments - -Privileged infrastructure workflows use GitHub environments as the account -boundary. This environment-scoped configuration keeps test and production -account values out of repository-wide variables. Configure these environments -under **Settings -> Environments**: - -| Environment | Purpose | Protection | -| --- | --- | --- | -| `test` | Trusted PR previews, PR-comment test commands, merge-to-main test applies, and test drift checks | No production approval; keep branch scope limited to protected branches for apply jobs | -| `prod-preview` | PR-comment production plans, production previews, and production drift checks with read-only or preview-only AWS access | No apply permissions | -| `prod` | Production apply only | Require reviewers and restrict deployment branches | - -Fork pull requests must stay unprivileged. Same-repo privileged jobs should fail -fast when required environment variables are missing. - -## Environment variables - -Add account-specific values under each GitHub environment's **Variables** tab. -Do not store AWS account configuration as repository-wide variables when it -differs between test and production. - -| Variable | Purpose | Notes | -| --- | --- | --- | -| `AWS_ACCOUNT_ID` | Expected 12-digit AWS account ID for the environment | Used with OIDC account allow-listing and evidence | -| `AWS_REGION` | Region used by `configure-aws-credentials` and Pulumi | Optional only when the workflow has a safe default | -| `AWS_PREVIEW_ROLE_ARN` | OIDC role used by preview and IAM validation jobs | Required for `test` and `prod-preview` | -| `AWS_APPLY_ROLE_ARN` | OIDC role used by apply jobs | Required only for `test` and `prod` | -| `AWS_DRIFT_ROLE_ARN` | OIDC role used by drift jobs | Required for `prod-preview`; `test` can fall back to `AWS_PREVIEW_ROLE_ARN` | -| `AWS_OPERATIONS_ALERT_TRIAGE_ROLE_ARN` | Dedicated OIDC role used only by operations alert issue triage | Required for `test` when alert triage is enabled | -| `PULUMI_BACKEND_URL` | Account-specific shared Pulumi backend | Required for privileged jobs | -| `PULUMI_SECRETS_PROVIDER` | AWS KMS Pulumi secrets provider URI | Required; use an `awskms://...` URI | -| `PULUMI_PREVIEW_STACKS` | Comma-separated stack list for preview jobs | Use `test` in `test`; use `prod` in `prod-preview` | -| `PULUMI_DRIFT_STACKS` | Comma-separated stack list for drift jobs | Use explicit account-local stacks | - -Optional PR-only overrides for the `test` environment: - -| Variable | Purpose | -| --- | --- | -| `PULUMI_PR_BACKEND_URL` | Backend used by trusted PR previews and test deploys when `PULUMI_BACKEND_URL` is not populated | -| `PULUMI_PR_PREVIEW_STACKS` | Stack list used by trusted PR previews and test deploys when shared preview/drift stack lists are not populated | - -`Pulumi Test Deploy` can also fall back from `AWS_APPLY_ROLE_ARN` and -`AWS_DRIFT_ROLE_ARN` to `AWS_PREVIEW_ROLE_ARN` in `test` while a single -environment-scoped bootstrap role is being expanded by the stack itself. - -Use separate AWS roles per account and purpose. Preview roles should be unable -to mutate production resources. Apply roles should be scoped to the exact -resources Pulumi manages in that account. - -## PR comment commands - -Repository owners, members, and collaborators can request Pulumi operations from -same-repository pull requests: - -```text -/pulumi test plan -/pulumi test up -/pulumi prod plan -/pulumi prod up -``` - -`/pulumi plan` and `/pulumi up` are compatibility aliases for the `test` -environment. Fork pull requests are rejected before any AWS credentials are -requested. - -The comment intake workflow dispatches a trusted runner with the PR number, -exact head SHA, target environment, and command. The runner revalidates that the -PR head is still the queued SHA before checkout. Production commands always run -the test account first: they save and validate a test plan, apply it to `test`, -run post-apply drift detection, and only then continue to `prod-preview` or the -protected `prod` environment for the same SHA. - -## Optional environment secrets - -Add these under the GitHub environment's **Secrets** tab only when needed. +Privileged Pulumi workflows use GitHub OIDC and AWS Secrets Manager. Pulumi +Cloud and Pulumi ESC are not used for CI configuration. -| Secret | Purpose | Notes | -| --- | --- | --- | -| `PULUMI_ACCESS_TOKEN` | Authenticate against the Pulumi Service backend | Only required when the backend is Pulumi Cloud | +Release workflows may use `REPO_GITHUB_TOKEN` when present and fall back to `GITHUB_TOKEN` +for repository-scoped release automation. -Shared Pulumi backends should use an AWS KMS-backed secrets provider rather -than a passphrase-managed stack secret flow. +## Source Of Truth -## OIDC role setup +AWS Secrets Manager is the account-configuration boundary and stores +account-local CI values. GitHub stores only non-secret metadata needed to find +and read those values: -1. Create an IAM OIDC identity provider for `https://token.actions.githubusercontent.com` in each AWS account if it does not already exist. -2. Create separate preview, apply, drift, and operations alert triage roles where the environment needs them. -3. Scope trust policies to this repository, the `sts.amazonaws.com` audience, and the relevant GitHub environment subject. -4. Store the role ARNs as `AWS_PREVIEW_ROLE_ARN`, `AWS_APPLY_ROLE_ARN`, `AWS_DRIFT_ROLE_ARN`, or `AWS_OPERATIONS_ALERT_TRIAGE_ROLE_ARN` in the matching GitHub environment. -5. Configure workflows to use `allowed-account-ids` with `AWS_ACCOUNT_ID`. +- `AWS_TEST_REGION` +- `AWS_TEST_PR_CI_CONFIG_ROLE_ARN` +- `AWS_TEST_CI_CONFIG_ROLE_ARN` +- `AWS_PROD_REGION` +- `AWS_PROD_PREVIEW_CI_CONFIG_ROLE_ARN` +- `AWS_PROD_CI_CONFIG_ROLE_ARN` -See the dedicated [CI guardrails guide](ci-guardrails.md) for an example trust policy and the documented `sub` claim formats. +The role ARNs are not secret. Each role is trusted by GitHub OIDC and scoped to +one fixed CI secret suffix. -For environment-bound jobs, the trusted subject has this shape: +## Fixed Secret IDs -```text -repo:VilnaCRM-Org/bootstrap-infrastructure:environment: -``` - -Use `environment:test`, `environment:prod-preview`, or `environment:prod` as -appropriate. Avoid broad branch-only trust for production apply roles. -The operations alert triage role should also bind -`token.actions.githubusercontent.com:job_workflow_ref` to -`VilnaCRM-Org/bootstrap-infrastructure/.github/workflows/operations-alert-triage.yml@refs/heads/main`. - -## Release Automation Secrets - -| Secret | Purpose | Notes | -| --- | --- | --- | -| `REPO_GITHUB_TOKEN` | Publish changelog-based releases | Optional; if unset, workflows fall back to `GITHUB_TOKEN` with `contents:write`. | - -## Template Sync Secrets - -Choose one authentication strategy for the template sync workflows: - -| Secret | Purpose | Notes | -| --- | --- | --- | -| `PERSONAL_ACCESS_TOKEN` | Authenticate template sync (PAT workflow) | Required by `.github/workflows/template-sync-pat.yml`. Needs repo write access. | -| `VILNACRM_APP_ID` | GitHub App ID for template sync | Required by `.github/workflows/template-sync-app.yml`. | -| `VILNACRM_APP_PRIVATE_KEY` | GitHub App private key for template sync | Required by `.github/workflows/template-sync-app.yml`. Store the PEM contents. | - -## Setting Secrets and Variables - -1. Navigate to **Settings → Secrets and variables → Actions** in your GitHub repository. -2. Create `test`, `prod-preview`, and `prod` under **Environments**. -3. Add the environment variables listed above to each environment with account-local values. -4. Add `PULUMI_ACCESS_TOKEN` as an environment secret only when the selected backend requires it. -5. Keep release and template-sync credentials as repository or organization secrets because they are not account-specific deploy credentials. -6. Require reviewers and deployment branch restrictions on `prod` before enabling production apply. - -Rotate credentials regularly and audit workflow runs for unexpected usage. +| CI suffix | AWS Secrets Manager secret ID | +| --- | --- | +| `test-pr` | `/bootstrap-infrastructure/ci/test-pr` | +| `test` | `/bootstrap-infrastructure/ci/test` | +| `prod-preview` | `/bootstrap-infrastructure/ci/prod-preview` | +| `prod` | `/bootstrap-infrastructure/ci/prod` | + +The Pulumi `test` stack manages `test-pr` and `test`. The Pulumi `prod` stack +manages `prod-preview` and `prod`. Stack outputs include +`ciConfigurationSecretIds`, `ciConfigurationSecretArns`, and +`githubCiConfigReadRoleArns`. + +## Runtime Contract + +`.github/actions/load-aws-ci-env`: + +1. derives the fixed Secrets Manager secret ID from the workflow input suffix; +2. assumes the matching `GitHubCiConfigRead-*` role through GitHub OIDC; +3. reads the JSON payload with AWS CLI; +4. validates required keys and account ID without printing values; +5. exports the validated environment variables for later workflow steps. + +Workflows must not accept account, role, backend, stack, or secret-provider +values from pull request text, issue comments, repository dispatch payloads, or +GitHub Environment variables. + +## Required JSON Keys + +Common keys: + +- `AWS_ACCOUNT_ID` +- `AWS_REGION` +- `PULUMI_BACKEND_URL` +- `PULUMI_SECRETS_PROVIDER` + +Purpose-specific keys: + +- `AWS_PREVIEW_ROLE_ARN` +- `AWS_APPLY_ROLE_ARN` +- `AWS_DRIFT_ROLE_ARN` +- `AWS_OPERATIONS_ALERT_TRIAGE_ROLE_ARN` +- `PULUMI_PREVIEW_STACKS` +- `PULUMI_DRIFT_STACKS` +- `OPERATIONS_ALERT_QUEUE_NAME` +- `OPERATIONS_TOPIC_ARN` +- `OPERATIONS_CLOUDTRAIL_NAME` + +## Operator Runbook + +Follow [AWS Secrets Manager CI cutover manual](aws-secrets-manager-ci-cutover.md) +to refresh AWS CLI credentials, apply Pulumi stacks, populate Secrets Manager +payloads, set GitHub variables, verify privileged CI, and remove legacy GitHub +Environment variables. + +Never paste secret payloads into chat, GitHub issues, workflow logs, docs, or +Pulumi config. Use `put-secret-value` with a private local JSON file, and use +`describe-secret` for metadata-only verification. + +## Legacy GitHub Environment Cleanup + +After AWS-only privileged CI is green, run **GitHub Environment Legacy Variable Cleanup** +with `dry_run=true`. The workflow needs a +`GH_ENVIRONMENT_ADMIN_TOKEN` with repository **Environments** write permission. +Review the planned deletion of legacy keys, including old `PULUMI_PR_*` +variables, then rerun with `dry_run=false` and the documented confirmation +sentence from the cutover manual. + +If legacy operations-alert issues need reconciliation and no new queued alert +exists to create a canonical issue, use **Operations Alert Canonical Backfill** +with an SRE-confirmed `stable_event_json` object first. Then use +**Operations Alert Legacy Reconcile** and provide the required +`sre_confirmation_reference`. diff --git a/docs/pulumi-guardrails.md b/docs/pulumi-guardrails.md index 75b66c4..23df799 100644 --- a/docs/pulumi-guardrails.md +++ b/docs/pulumi-guardrails.md @@ -71,9 +71,9 @@ The PR preview workflow also runs the same policy pack during `make test-preview before the destructive diff and IAM validation steps inspect the resulting preview artifact. -`make pulumi-preview` and `make pulumi-up` also enable the policy pack by -default. Before Pulumi starts, the repository checks that the shared -container-managed `uv` environment contains both `pulumi` and +`make pulumi-preview`, `make pulumi-plan`, and `make pulumi-up-plan` also +enable the policy pack by default. Before Pulumi starts, the repository checks +that the shared container-managed `uv` environment contains both `pulumi` and `pulumi-policy`; if the branch changed Python dependencies, the helper resyncs the environment from `uv.lock` and repoints `policy/.venv` at that shared interpreter. diff --git a/docs/security-baseline.md b/docs/security-baseline.md index ae6ff63..cdfd620 100644 --- a/docs/security-baseline.md +++ b/docs/security-baseline.md @@ -119,9 +119,9 @@ When you add new workflows: ### Review Before Apply -Use `make pulumi-preview` before `make pulumi-up`, and keep reviewable PRs as -the normal path for infrastructure changes. A preview that is not tied to the -code under review is much harder to trust later. +Use `make pulumi-plan` before `make pulumi-up-plan`, and keep reviewable PRs as +the normal path for infrastructure changes. An apply that is not tied to the +reviewed saved plan is much harder to trust later. ### Prefer Ephemeral Validation Stacks diff --git a/docs/security-operating-evidence.md b/docs/security-operating-evidence.md index bc10b7a..563b847 100644 --- a/docs/security-operating-evidence.md +++ b/docs/security-operating-evidence.md @@ -15,8 +15,8 @@ Current vulnerability-review evidence is retained in | Principal | Current control | Evidence source | Owner | Cadence | Fallback | | --- | --- | --- | --- | --- | --- | -| GitHub Actions preview and IAM validation | Environment-scoped OIDC trust using `repo:VilnaCRM-Org/bootstrap-infrastructure:environment:test` or `prod-preview`; no static AWS keys. | `.github/workflows/pulumi-pr-guardrails.yml`, `docs/github-actions-secrets.md`, `pulumi/infra/iam/github_oidc.py`. | Maintainer plus security reviewer | Per workflow or trust-policy change | Fail privileged jobs when OIDC variables are missing or account ID does not match. | -| GitHub Actions apply | Separate apply role, protected `prod` environment for production, saved-plan manifest, and commit SHA checks. | `.github/workflows/pulumi-test-deploy.yml`, `.github/workflows/pulumi-prod.yml`, `scripts/run_pulumi_command.py`. | Maintainer plus SRE | Per deploy workflow change | Do not apply until the environment, SHA, manifest, destructive diff, and IAM validation evidence match. | +| GitHub Actions preview and IAM validation | AWS Secrets Manager holds account-local config; fixed AWS Secrets Manager CI secrets are loaded at runtime; AWS OIDC trust is scoped to fixed repo subjects and workflow refs; no static AWS keys. | `.github/workflows/pulumi-pr-guardrails.yml`, `.github/actions/load-aws-ci-env/action.yml`, `docs/github-actions-secrets.md`, `pulumi/infra/automation.py`. | Maintainer plus security reviewer | Per workflow or trust-policy change | Fail privileged jobs when AWS CI config variables are missing or account ID does not match. | +| GitHub Actions apply | Separate apply role, protected GitHub `prod` Environment for production approval, saved-plan manifest, and commit SHA checks. | `.github/workflows/pulumi-test-deploy.yml`, `.github/workflows/pulumi-prod.yml`, `scripts/run_pulumi_command.py`. | Maintainer plus SRE | Per deploy workflow change | Do not apply until the AWS Secrets Manager CI secret, SHA, manifest, destructive diff, and IAM validation evidence match. | | Local maintainer AWS access | Local credentials are outside the repository and are passed only through explicit Docker environment flags. | `.env` is ignored; `.env.empty` is committed; `docs/security-baseline.md`. | Maintainer | Quarterly | Treat local static keys as an exception requiring external owner approval and rotation evidence. | | Human GitHub administration | Branch rulesets and environments require repository admin rights. | `scripts/configure_github_repository_controls.py` documents the desired state. | Repository admin | Per ruleset or environment change | Keep branch protection and production approval unresolved until GitHub metadata proves the controls. | @@ -24,8 +24,8 @@ Current vulnerability-review evidence is retained in | Role or policy surface | Scope | Boundary | Validation | | --- | --- | --- | --- | -| Preview role | Reads stack state, generates Pulumi previews, runs destructive diff and IAM Access Analyzer validation. | GitHub environment OIDC subject and account allow-listing. | Same-repo PR guardrail workflow and `make test-guardrails`. | -| Apply role | Applies saved plans only in `test` or protected `prod`. | GitHub environment approval, commit SHA checks, saved-plan manifest, backend match, and plan hash verification. | `make pulumi-plan`, `make pulumi-up-plan`, workflow tests, and unit coverage. | +| Preview role | Reads stack state, generates Pulumi previews, runs destructive diff and IAM Access Analyzer validation. | Fixed AWS Secrets Manager CI secret, fixed workflow-ref OIDC trust, and account allow-listing. | Same-repo PR guardrail workflow and `make test-guardrails`. | +| Apply role | Applies saved plans only in `test` or protected `prod`. | Fixed AWS Secrets Manager CI secret, protected GitHub `prod` approval for production, commit SHA checks, saved-plan manifest, backend match, and plan hash verification. | `make pulumi-plan`, `make pulumi-up-plan`, workflow tests, and unit coverage. | | Bootstrap automation policy | Manages repository-prefixed S3, KMS, IAM, Backup, ECR, EventBridge, CloudTrail, SNS/SQS, Budgets, Cost Anomaly, GuardDuty, Security Hub, and AWS Config resources. | Resource ARNs, deterministic name prefixes, request/resource tags, service constraints, and policy-pack wildcard checks. | `tests/unit/test_components.py`, `tests/policies/test_policy_pack.py`, `make test-iam-validation` when AWS credentials are available. | | AWS Config recorder role | Allows AWS Config to describe supported resources and write delivery objects to the dedicated Config bucket. | Service principal trust for `config.amazonaws.com` and bucket-prefix policy. | Pulumi unit tests and real preview policy-pack validation. | | Backup role | Allows AWS Backup to protect repository state/log buckets and restore to isolated drill locations. | `iam:PassedToService` condition for AWS Backup plus scoped backup resources. | Restore evidence and backup component tests. | @@ -42,7 +42,7 @@ tags, deterministic names, service conditions, and tests. | `sts:GetCallerIdentity` | AWS identity lookup is account metadata and has no resource ARN. | Read-only; used for account evidence and preflight checks. | | `kms:CreateKey` | KMS keys have no ARN before creation. | Requires bootstrap `Environment` and `Purpose` request tags. | | `kms:ListAliases` | KMS alias listing is not resource-scopable. | Follow-up alias changes are scoped to bootstrap aliases and tagged keys. | -| `iam:CreateOpenIDConnectProvider` and `iam:ListOpenIDConnectProviders` | OIDC provider creation/list APIs are account-level. | Trust policy is environment-scoped; provider ARN is deterministic after creation. | +| `iam:CreateOpenIDConnectProvider` and `iam:ListOpenIDConnectProviders` | OIDC provider creation/list APIs are account-level. | Trust policies are scoped to fixed repository subjects, workflow refs, and the protected `prod` environment subject only for production apply; provider ARN is deterministic after creation. | | `ce:*CostAnomaly*` create actions | Cost Explorer anomaly resources require account-level creation APIs. | Requires request tags and later resource-tag conditions where AWS supports them. | | `guardduty:ListDetectors` and `guardduty:CreateDetector` | Detector discovery and creation are account-level. | Detector creation requires bootstrap request tags; management is scoped to account detector ARNs. | | AWS Config delivery channel actions | AWS Config delivery-channel APIs are account/region-level and do not support the same recorder ARN scoping. | Recorder role, delivery bucket, and recorder management are scoped to bootstrap names. | @@ -56,7 +56,7 @@ self-managed by the same bootstrap automation role it is meant to constrain. Current compensating controls are: -- environment-scoped GitHub OIDC trust +- AWS Secrets Manager-scoped CI configuration with fixed GitHub OIDC trust subjects - account allow-listing in privileged jobs - deterministic AWS resource names and ARN scopes - request and resource tag conditions where AWS supports them diff --git a/docs/sre-operations.md b/docs/sre-operations.md index 9dccc8a..825aa50 100644 --- a/docs/sre-operations.md +++ b/docs/sre-operations.md @@ -66,14 +66,16 @@ critical resources without requiring live AWS credentials. Keep `make test-iam-validation` for the separate Access Analyzer check when you intentionally have AWS credentials configured. -Apply only after the preview is understood and reviewed: +Apply only through a reviewed saved plan: ```bash -make pulumi-up +make pulumi-plan +make pulumi-up-plan pulumi -C pulumi stack output ``` -`make pulumi-up` uses the same policy-pack enforcement path as preview. +`make pulumi-plan` and `make pulumi-up-plan` use the same policy-pack +enforcement path as preview while preserving the reviewed plan artifact. For drift reconciliation without applying a fresh plan: @@ -149,15 +151,22 @@ omitting `central-logging` during this migration. State bucket logging depends on the concrete central logging bucket resources, so a full-stack preview/apply keeps the logging and state replica changes ordered together. -## GitHub Environment Operations +## AWS CI config and GitHub Environment Operations -The deployment boundary is the GitHub environment: +AWS Secrets Manager is the account-configuration boundary. The fixed CI suffixes +load these AWS Secrets Manager JSON secrets: -- `test` handles trusted PR previews, main-branch test applies, and test drift -- `prod-preview` handles production preview and drift without production apply - permissions -- `prod` handles production apply and must require reviewers plus deployment - branch restrictions +- `/bootstrap-infrastructure/ci/test-pr` handles trusted PR previews, + IAM validation, and same-repo PR evidence collection +- `/bootstrap-infrastructure/ci/test` handles main-branch test + applies, test drift, operations alert triage, and evidence collection +- `/bootstrap-infrastructure/ci/prod-preview` handles production + preview and drift without production apply permissions +- `/bootstrap-infrastructure/ci/prod` handles production apply values + +The approval boundary is the protected GitHub `prod` Environment. It must +require reviewers plus deployment branch restrictions. Do not use GitHub +`test` or `prod-preview` Environments for privileged account variables. Before approving `prod`, compare the reviewed commit SHA with the apply SHA and review the preview summary, destructive diff result, IAM validation result, AWS @@ -165,9 +174,10 @@ account evidence, stack name, and role purpose. Do not approve a production apply from a different SHA than the preview you reviewed. Privileged runs should preserve evidence that is useful but not sensitive: -GitHub environment, account ID, region, OIDC role purpose, backend type, stack -names, guardrail mode, commit SHA, and artifact names. Evidence must not include -stack exports, decrypted secret values, access keys, tokens, or private keys. +AWS Secrets Manager secret ID, account ID, region, OIDC role purpose, backend +type, stack names, guardrail mode, commit SHA, and artifact names. Evidence must +not include stack exports, decrypted secret values, access keys, tokens, or +private keys. ## Safe AWS Validation diff --git a/docs/testing.md b/docs/testing.md index f7c1368..da5bcf0 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -214,7 +214,8 @@ Coverage: - `make all` - `make start` - `make pulumi-preview` -- `make pulumi-up` +- `make pulumi-plan` +- `make pulumi-up-plan` - `make pulumi-refresh` - `make pulumi-destroy` - `make sh` diff --git a/pulumi/__main__.py b/pulumi/__main__.py index 607b014..ae48a1a 100644 --- a/pulumi/__main__.py +++ b/pulumi/__main__.py @@ -51,6 +51,8 @@ security_account_controls = bootstrap.security_account_controls if bootstrap.automation is not None: automation = bootstrap.automation + if bootstrap.ci_config is not None: + ci_config = bootstrap.ci_config pulumi.export("centralLogBucket", bootstrap.outputs["centralLogBucket"]) pulumi.export("centralLogBucketArn", bootstrap.outputs["centralLogBucketArn"]) @@ -61,6 +63,19 @@ pulumi_secrets_provider_urls = bootstrap.outputs["pulumiSecretsProviderUrls"] pulumi.export("pulumiSecretsProviderUrls", pulumi_secrets_provider_urls) pulumi.export("deployRoleArns", bootstrap.outputs["deployRoleArns"]) + if bootstrap.ci_config is not None: + pulumi.export( + "ciConfigurationSecretIds", + bootstrap.outputs["ciConfigurationSecretIds"], + ) + pulumi.export( + "ciConfigurationSecretArns", + bootstrap.outputs["ciConfigurationSecretArns"], + ) + pulumi.export( + "githubCiConfigReadRoleArns", + bootstrap.outputs["githubCiConfigReadRoleArns"], + ) managed_repository_projects = bootstrap.outputs["managedRepositoryProjects"] pulumi.export("managedRepositoryProjects", managed_repository_projects) managed_repository_metadata = bootstrap.outputs["managedRepositoryMetadata"] diff --git a/pulumi/infra/__init__.py b/pulumi/infra/__init__.py index d14dbba..a9ec36c 100644 --- a/pulumi/infra/__init__.py +++ b/pulumi/infra/__init__.py @@ -5,6 +5,7 @@ from .bootstrap_dependencies import BootstrapInfrastructureDependencies from .bootstrap_infrastructure import BootstrapInfrastructure from .bootstrap_settings import BootstrapSettings +from .ci_config import CiConfiguration from .cost_controls import CostControlInputs, CostControls from .logging_bucket import CentralLoggingBuckets from .managed_repository import ManagedRepository @@ -18,6 +19,7 @@ "BootstrapInfrastructure", "BootstrapInfrastructureDependencies", "BootstrapSettings", + "CiConfiguration", "CostControlInputs", "CostControls", "GitHubAutomation", diff --git a/pulumi/infra/automation.py b/pulumi/infra/automation.py index fab5341..a39acf6 100644 --- a/pulumi/infra/automation.py +++ b/pulumi/infra/automation.py @@ -44,6 +44,8 @@ "ManageBootstrapIam", "CreateBootstrapOidcProvider", "ListBootstrapOidcProviders", + "CreateBootstrapCiSecrets", + "ManageBootstrapCiSecrets", "PassBootstrapRolesToBackup", "PassBootstrapRolesToConfig", "ManageBootstrapBackup", @@ -266,6 +268,18 @@ "sqs:ReceiveMessage", "sqs:DeleteMessage", ) +_AUTOMATION_SECRETS_MANAGER_CREATE_ACTIONS = ( + "secretsmanager:CreateSecret", + "secretsmanager:TagResource", +) +_AUTOMATION_SECRETS_MANAGER_RESOURCE_ACTIONS = ( + "secretsmanager:DeleteSecret", + "secretsmanager:DescribeSecret", + "secretsmanager:ListSecretVersionIds", + "secretsmanager:RestoreSecret", + "secretsmanager:TagResource", + "secretsmanager:UntagResource", +) _AUTOMATION_BUDGETS_ACTIONS = ( "budgets:ModifyBudget", "budgets:DescribeBudget", @@ -475,9 +489,24 @@ def _automation_iam_role_resources( settings, repo_name, ) + repo_part = settings.sanitize_bucket_component(repo_name, "repoSlug").replace( + ".", + "-", + ) + ci_config_read_role_resources = [] + for suffix in _automation_ci_secret_suffixes(settings.environment): + safe_suffix = settings.sanitize_bucket_component( + suffix, + "ciConfigSuffix", + ).replace(".", "-") + ci_config_read_role_resources.append( + f"arn:aws:iam::{account_id}:role/GitHubCiConfigRead-" + f"{repo_part}-{safe_suffix}" + ) return [ f"arn:aws:iam::{account_id}:role/{automation_role_name}", f"arn:aws:iam::{account_id}:role/{operations_alert_triage_role_name}", + *ci_config_read_role_resources, f"arn:aws:iam::{account_id}:role/PulumiDeploy-*", f"arn:aws:iam::{account_id}:role/PulumiStateRepl-*", f"arn:aws:iam::{account_id}:role/central-logging-replication-role-*", @@ -531,6 +560,30 @@ def _automation_sqs_resources( return [f"arn:aws:sqs:*:{account_id}:bootstrap-{environment}-operations-alerts"] +def _automation_ci_secret_suffixes(environment: str) -> tuple[str, ...]: + """Return CI secret suffixes owned by one bootstrap stack.""" + return { + "test": ("test-pr", "test"), + "prod": ("prod-preview", "prod"), + }.get(environment, (environment,)) + + +def _automation_ci_secret_resources( + account_id: str, + settings: BootstrapSettings, + repo_name: str, +) -> list[str]: + """Scope Secrets Manager management to CI config secret containers.""" + repo_part = settings.sanitize_bucket_component(repo_name, "repoSlug").replace( + ".", + "-", + ) + return [ + f"arn:aws:secretsmanager:*:{account_id}:secret:/{repo_part}/ci/{suffix}-*" + for suffix in _automation_ci_secret_suffixes(settings.environment) + ] + + def _automation_budget_resources( account_id: str, settings: BootstrapSettings ) -> list[str]: @@ -590,9 +643,22 @@ def _automation_aws_config_recorder_resources( def _automation_assume_role_policy( - oidc_provider_arn: str, org: str, repo_name: str, environment: str + oidc_provider_arn: str, + org: str, + repo_name: str, + production_environment: str, + branch_name: str, ) -> str: - """Build the GitHub OIDC trust policy for environment-scoped automation.""" + """Build the GitHub OIDC trust policy for fixed workflow automation.""" + workflow_prefix = f"{org}/{repo_name}/.github/workflows" + if production_environment == "prod": + subjects = [f"repo:{org}/{repo_name}:environment:{production_environment}"] + else: + subjects = [ + f"repo:{org}/{repo_name}:ref:refs/heads/{branch_name}", + f"repo:{org}/{repo_name}:pull_request", + ] + return json.dumps( { "Version": "2012-10-17", @@ -606,10 +672,33 @@ def _automation_assume_role_policy( "token.actions.githubusercontent.com:aud": ( "sts.amazonaws.com" ), - "token.actions.githubusercontent.com:sub": ( - f"repo:{org}/{repo_name}:environment:{environment}" - ), - } + "token.actions.githubusercontent.com:sub": subjects, + }, + "StringLike": { + "token.actions.githubusercontent.com:job_workflow_ref": [ + (f"{workflow_prefix}/pulumi-pr-guardrails.yml@refs/*"), + ( + f"{workflow_prefix}/pulumi-test-deploy.yml" + f"@refs/heads/{branch_name}" + ), + ( + f"{workflow_prefix}/nightly-guardrails.yml" + f"@refs/heads/{branch_name}" + ), + ( + f"{workflow_prefix}/pulumi-prod.yml" + f"@refs/heads/{branch_name}" + ), + ( + f"{workflow_prefix}/pulumi-pr-command-runner.yml" + f"@refs/heads/{branch_name}" + ), + ( + f"{workflow_prefix}/well-architected-evidence.yml" + "@refs/*" + ), + ] + }, }, } ], @@ -643,7 +732,6 @@ def _operations_alert_triage_assume_role_policy( oidc_provider_arn: str, org: str, repo_name: str, - environment: str, branch_name: str, ) -> str: """Build the OIDC trust policy for the alert triage workflow only.""" @@ -661,14 +749,16 @@ def _operations_alert_triage_assume_role_policy( "sts.amazonaws.com" ), "token.actions.githubusercontent.com:sub": ( - f"repo:{org}/{repo_name}:environment:{environment}" + f"repo:{org}/{repo_name}:ref:refs/heads/{branch_name}" ), + }, + "StringLike": { "token.actions.githubusercontent.com:job_workflow_ref": ( f"{org}/{repo_name}/.github/workflows/" "operations-alert-triage.yml" f"@refs/heads/{branch_name}" ), - } + }, }, } ], @@ -700,10 +790,15 @@ def _automation_policy( account_id: str, settings: BootstrapSettings, repo_name: str ) -> str: """Return the policy used by GitHub automation for bootstrap operations.""" - oidc_provider_arn = ( + github_oidc_provider_arn = ( f"arn:aws:iam::{account_id}:oidc-provider/token.actions.githubusercontent.com" ) iam_role_resources = _automation_iam_role_resources(account_id, settings, repo_name) + ci_secret_resources = _automation_ci_secret_resources( + account_id, + settings, + repo_name, + ) kms_purposes = ["pulumi-secrets", "operations-alerting", "operations-cloudtrail"] kms_tag_condition = { "StringEquals": { @@ -717,6 +812,18 @@ def _automation_policy( AWS_REQUEST_TAG_PURPOSE_KEY: kms_purposes, } } + ci_secret_request_tag_condition = { + "StringEquals": { + AWS_REQUEST_TAG_ENVIRONMENT_KEY: settings.environment, + AWS_REQUEST_TAG_PURPOSE_KEY: "ci-configuration", + } + } + ci_secret_resource_tag_condition = { + "StringEquals": { + AWS_RESOURCE_TAG_ENVIRONMENT_KEY: settings.environment, + AWS_RESOURCE_TAG_PURPOSE_KEY: "ci-configuration", + } + } kms_alias_condition = { "StringEqualsIfExists": { AWS_RESOURCE_TAG_ENVIRONMENT_KEY: settings.environment, @@ -833,7 +940,10 @@ def _automation_policy( "iam:UpdateAssumeRolePolicy", "iam:UpdateOpenIDConnectProviderThumbprint", ], - "Resource": [*iam_role_resources, oidc_provider_arn], + "Resource": [ + *iam_role_resources, + github_oidc_provider_arn, + ], }, { "Sid": "CreateBootstrapOidcProvider", @@ -847,6 +957,20 @@ def _automation_policy( "Action": ["iam:ListOpenIDConnectProviders"], "Resource": "*", }, + { + "Sid": "CreateBootstrapCiSecrets", + "Effect": "Allow", + "Action": list(_AUTOMATION_SECRETS_MANAGER_CREATE_ACTIONS), + "Resource": ci_secret_resources, + "Condition": ci_secret_request_tag_condition, + }, + { + "Sid": "ManageBootstrapCiSecrets", + "Effect": "Allow", + "Action": list(_AUTOMATION_SECRETS_MANAGER_RESOURCE_ACTIONS), + "Resource": ci_secret_resources, + "Condition": ci_secret_resource_tag_condition, + }, { "Sid": "PassBootstrapRolesToBackup", "Effect": "Allow", @@ -1278,6 +1402,7 @@ def _create_automation_role( context.settings.org, context.repo_name, context.settings.environment, + context.settings.github_branch or "main", ), ), tags=_automation_tags( @@ -1424,7 +1549,6 @@ def _create_operations_alert_triage_role( arn, context.settings.org, context.repo_name, - context.settings.environment, branch_name, ), ), diff --git a/pulumi/infra/bootstrap_dependencies.py b/pulumi/infra/bootstrap_dependencies.py index e5a335b..27bf282 100644 --- a/pulumi/infra/bootstrap_dependencies.py +++ b/pulumi/infra/bootstrap_dependencies.py @@ -6,6 +6,7 @@ from .automation import GitHubAutomation from .backup import S3BackupPlan +from .ci_config import CiConfiguration from .cost_controls import CostControls from .iam import GitHubOidcRoles from .logging_bucket import CentralLoggingBuckets @@ -23,6 +24,7 @@ class BootstrapInfrastructureDependencies: state_buckets_cls: type[PulumiStateBuckets] = PulumiStateBuckets secrets_keys_cls: type[PulumiSecretsKeys] = PulumiSecretsKeys oidc_roles_cls: type[GitHubOidcRoles] = GitHubOidcRoles + ci_config_cls: type[CiConfiguration] = CiConfiguration automation_cls: type[GitHubAutomation] = GitHubAutomation backup_plan_cls: type[S3BackupPlan] = S3BackupPlan monitoring_cls: type[OperationsMonitoring] = OperationsMonitoring diff --git a/pulumi/infra/bootstrap_infrastructure.py b/pulumi/infra/bootstrap_infrastructure.py index 9a882e2..b3f88a5 100644 --- a/pulumi/infra/bootstrap_infrastructure.py +++ b/pulumi/infra/bootstrap_infrastructure.py @@ -2,14 +2,161 @@ from __future__ import annotations +from collections.abc import Sequence + import pulumi from .bootstrap_dependencies import BootstrapInfrastructureDependencies from .bootstrap_settings import BootstrapSettings from .cost_controls import CostControlInputs +from .managed_repository import ManagedRepository from .repository_catalog import ManagedRepositoryCatalog +def _repository_project( + repositories: Sequence[ManagedRepository], + repository_name: str, +) -> str: + """Return the catalog project for a repository, falling back to its name.""" + for repository in repositories: + if repository.name == repository_name: + return repository.project_name + return repository_name + + +def _create_ci_config( + *, + bootstrap, + dependencies: BootstrapInfrastructureDependencies, + settings: BootstrapSettings, + opts: pulumi.ResourceOptions, +): + """Create AWS Secrets Manager CI config resources when a runner repo exists.""" + if not settings.repo: + return None + return dependencies.ci_config_cls( + "ci-configuration", + settings=settings, + oidc_provider_arn=bootstrap.oidc.provider.arn, + opts=opts, + ) + + +def _create_automation( + *, + bootstrap, + repositories: Sequence[ManagedRepository], + opts: pulumi.ResourceOptions, +): + """Create the bootstrap automation role for repo-scoped stacks.""" + settings = bootstrap.settings + if not settings.repo: + return None + return bootstrap.dependencies.automation_cls( + "github-automation", + settings=settings, + repository_project=_repository_project(repositories, settings.repo), + oidc_provider_arn=bootstrap.oidc.provider.arn, + opts=opts, + ) + + +def _automation_policy_dependencies(automation) -> list[pulumi.Resource]: + """Return resources that policy-driven controls must wait for.""" + if automation is None: + return [] + return list(automation.policy_dependencies) + + +def _base_outputs( + bootstrap, + repository_catalog: ManagedRepositoryCatalog, +) -> dict[str, pulumi.Input[object]]: + """Return outputs emitted by every bootstrap stack.""" + return { + "centralLogBucket": bootstrap.logging.bucket.bucket, + "centralLogBucketArn": bootstrap.logging.bucket.arn, + "pulumiStateBuckets": bootstrap.state.state_buckets, + "pulumiBackendUrls": bootstrap.state.backend_urls, + "pulumiSecretsKeyArns": bootstrap.secrets.key_arns, + "pulumiSecretsAliases": bootstrap.secrets.alias_names, + "pulumiSecretsProviderUrls": bootstrap.secrets.provider_urls, + "deployRoleArns": bootstrap.oidc.deploy_role_arns, + "managedRepositoryProjects": repository_catalog.project_mapping(), + "managedRepositoryMetadata": repository_catalog.metadata_mapping(), + "backupVaultName": bootstrap.backup.vault.name, + "backupVaultArn": bootstrap.backup.vault.arn, + "backupRoleArn": bootstrap.backup.role.arn, + "operationsAlertTopicArn": bootstrap.monitoring.topic.arn, + "operationsCloudTrailBucketName": bootstrap.monitoring.cloudtrail_bucket_name, + "operationsCloudTrailName": bootstrap.monitoring.cloudtrail_name, + "operationsAlertRuleNames": { + suffix: rule.name for suffix, rule in bootstrap.monitoring.rules.items() + }, + "operationsAlertTopicKeyAliasName": (bootstrap.monitoring.topic_key_alias.name), + "operationsAlertQueueArn": bootstrap.monitoring.alert_queue.arn, + "operationsAlertQueueName": bootstrap.monitoring.alert_queue.name, + "operationsAlertQueueUrl": bootstrap.monitoring.alert_queue.url, + "operationsAlertQueueSubscriptionArn": ( + bootstrap.monitoring.alert_queue_subscription.arn + ), + "monthlyBudgetName": bootstrap.cost_controls.monthly_budget.name, + "costAnomalyMonitorArn": bootstrap.cost_controls.anomaly_monitor_arn, + "costAnomalySubscriptionArn": ( + bootstrap.cost_controls.anomaly_subscription.arn + ), + "guardDutyDetectorId": ( + bootstrap.security_account_controls.guardduty_detector.id + ), + "securityHubAccountArn": ( + bootstrap.security_account_controls.security_hub_account.arn + ), + "awsConfigRecorderName": ( + bootstrap.security_account_controls.config_recorder.name + ), + "awsConfigDeliveryChannelName": ( + bootstrap.security_account_controls.config_delivery_channel.name + ), + "awsConfigDeliveryBucketName": ( + bootstrap.security_account_controls.config_bucket.bucket + ), + } + + +def _automation_outputs(automation) -> dict[str, pulumi.Input[object]]: + """Return outputs for repo-scoped GitHub automation resources.""" + if automation is None: + return {} + return { + "automationRoleArn": automation.role.arn, + "operationsAlertTriageRoleArn": (automation.operations_alert_triage_role.arn), + "runnerRepositoryName": automation.repository.name, + "runnerRepositoryUrl": automation.repository.repository_url, + } + + +def _ci_config_outputs(ci_config) -> dict[str, pulumi.Input[object]]: + """Return outputs for AWS Secrets Manager backed CI config resources.""" + if ci_config is None: + return {} + return { + "ciConfigurationSecretIds": ci_config.secret_ids, + "ciConfigurationSecretArns": ci_config.secret_arns, + "githubCiConfigReadRoleArns": ci_config.read_role_arns, + } + + +def _bootstrap_outputs( + bootstrap, + repository_catalog: ManagedRepositoryCatalog, +) -> dict[str, pulumi.Input[object]]: + """Return the complete stack output map.""" + outputs = _base_outputs(bootstrap, repository_catalog) + outputs.update(_automation_outputs(bootstrap.automation)) + outputs.update(_ci_config_outputs(bootstrap.ci_config)) + return outputs + + class BootstrapInfrastructure(pulumi.ComponentResource): """Compose the full bootstrap infrastructure from injected dependencies.""" @@ -59,29 +206,20 @@ def __init__( secrets_key_arns=self.secrets.key_arns, opts=child_opts, ) - self.automation = None - automation_policy_dependencies: list[pulumi.Resource] = [] - if settings.repo: - automation_repository = next( - ( - repository - for repository in repositories - if repository.name == settings.repo - ), - None, - ) - self.automation = self.dependencies.automation_cls( - "github-automation", - settings=settings, - repository_project=( - automation_repository.project_name - if automation_repository is not None - else settings.repo - ), - oidc_provider_arn=self.oidc.provider.arn, - opts=child_opts, - ) - automation_policy_dependencies.extend(self.automation.policy_dependencies) + self.ci_config = _create_ci_config( + bootstrap=self, + dependencies=self.dependencies, + settings=settings, + opts=child_opts, + ) + self.automation = _create_automation( + bootstrap=self, + repositories=repositories, + opts=child_opts, + ) + automation_policy_dependencies = _automation_policy_dependencies( + self.automation + ) backup_targets = [self.logging.bucket.arn, *self.state.bucket_arns.values()] self.backup = self.dependencies.backup_plan_cls( @@ -115,62 +253,5 @@ def __init__( ) ) - self.outputs: dict[str, pulumi.Input[object]] = { - "centralLogBucket": self.logging.bucket.bucket, - "centralLogBucketArn": self.logging.bucket.arn, - "pulumiStateBuckets": self.state.state_buckets, - "pulumiBackendUrls": self.state.backend_urls, - "pulumiSecretsKeyArns": self.secrets.key_arns, - "pulumiSecretsAliases": self.secrets.alias_names, - "pulumiSecretsProviderUrls": self.secrets.provider_urls, - "deployRoleArns": self.oidc.deploy_role_arns, - "managedRepositoryProjects": repository_catalog.project_mapping(), - "managedRepositoryMetadata": repository_catalog.metadata_mapping(), - "backupVaultName": self.backup.vault.name, - "backupVaultArn": self.backup.vault.arn, - "backupRoleArn": self.backup.role.arn, - "operationsAlertTopicArn": self.monitoring.topic.arn, - "operationsCloudTrailBucketName": self.monitoring.cloudtrail_bucket_name, - "operationsCloudTrailName": self.monitoring.cloudtrail_name, - "operationsAlertRuleNames": { - suffix: rule.name for suffix, rule in self.monitoring.rules.items() - }, - "operationsAlertTopicKeyAliasName": self.monitoring.topic_key_alias.name, - "operationsAlertQueueArn": self.monitoring.alert_queue.arn, - "operationsAlertQueueName": self.monitoring.alert_queue.name, - "operationsAlertQueueUrl": self.monitoring.alert_queue.url, - "operationsAlertQueueSubscriptionArn": ( - self.monitoring.alert_queue_subscription.arn - ), - "monthlyBudgetName": self.cost_controls.monthly_budget.name, - "costAnomalyMonitorArn": self.cost_controls.anomaly_monitor_arn, - "costAnomalySubscriptionArn": (self.cost_controls.anomaly_subscription.arn), - "guardDutyDetectorId": ( - self.security_account_controls.guardduty_detector.id - ), - "securityHubAccountArn": ( - self.security_account_controls.security_hub_account.arn - ), - "awsConfigRecorderName": ( - self.security_account_controls.config_recorder.name - ), - "awsConfigDeliveryChannelName": ( - self.security_account_controls.config_delivery_channel.name - ), - "awsConfigDeliveryBucketName": ( - self.security_account_controls.config_bucket.bucket - ), - } - if self.automation is not None: - self.outputs.update( - { - "automationRoleArn": self.automation.role.arn, - "operationsAlertTriageRoleArn": ( - self.automation.operations_alert_triage_role.arn - ), - "runnerRepositoryName": self.automation.repository.name, - "runnerRepositoryUrl": self.automation.repository.repository_url, - } - ) - + self.outputs = _bootstrap_outputs(self, repository_catalog) self.register_outputs(self.outputs) diff --git a/pulumi/infra/ci_config.py b/pulumi/infra/ci_config.py new file mode 100644 index 0000000..d48d535 --- /dev/null +++ b/pulumi/infra/ci_config.py @@ -0,0 +1,334 @@ +"""AWS-side resources that back GitHub Actions CI configuration.""" + +from __future__ import annotations + +import json +from collections.abc import Sequence + +import pulumi_aws as aws + +import pulumi + +from .bootstrap_settings import BootstrapSettings +from .config import settings as default_settings +from .utils.outputs import apply_output +from .utils.tags import base_tags + +CI_CONFIG_SECRET_SUFFIXES_BY_STACK = { + "test": ("test-pr", "test"), + "prod": ("prod-preview", "prod"), +} + + +def _ci_config_project(settings: BootstrapSettings) -> str: + """Return the repository project name used in CI secret IDs.""" + if not settings.repo: + raise ValueError("repoSlug config is required for AWS CI configuration.") + return settings.sanitize_bucket_component(settings.repo, "repoSlug").replace( + ".", + "-", + ) + + +def _ci_secret_suffixes(environment: str) -> tuple[str, ...]: + """Return CI secret suffixes owned by one bootstrap stack.""" + return CI_CONFIG_SECRET_SUFFIXES_BY_STACK.get(environment, (environment,)) + + +def _ci_secret_id(settings: BootstrapSettings, suffix: str) -> str: + """Return the AWS Secrets Manager secret ID used by one CI configuration.""" + project = _ci_config_project(settings) + return f"/{project}/ci/{suffix}" + + +def _ci_secret_arn_patterns( + *, + account_id: str, + partition: str, + settings: BootstrapSettings, + suffixes: Sequence[str], +) -> list[str]: + """Return ARN patterns for Secrets Manager secrets with random suffixes.""" + return [ + f"arn:{partition}:secretsmanager:*:{account_id}:secret:" + f"{_ci_secret_id(settings, suffix)}-*" + for suffix in suffixes + ] + + +def _ci_config_read_role_name(settings: BootstrapSettings, suffix: str) -> str: + """Return the GitHub OIDC role name allowed to read one CI secret.""" + project = _ci_config_project(settings) + safe_suffix = settings.sanitize_bucket_component(suffix, "ciConfigSuffix").replace( + ".", + "-", + ) + name = f"GitHubCiConfigRead-{project}-{safe_suffix}" + if len(name) > 64: + raise ValueError( + "Combined repo/CI suffix produce GitHub CI config read role name " + f"'{name}' longer than 64 characters." + ) + return name + + +def _github_actions_subjects(settings: BootstrapSettings, suffix: str) -> list[str]: + """Return allowed GitHub OIDC subject claims for one CI config suffix.""" + if not settings.repo: + raise ValueError("repoSlug config is required for GitHub OIDC subjects.") + branch = settings.github_branch or "main" + repo = f"{settings.org}/{settings.repo}" + if suffix == "test-pr": + return [f"repo:{repo}:pull_request"] + if suffix == "prod": + return [f"repo:{repo}:environment:prod"] + return [f"repo:{repo}:ref:refs/heads/{branch}"] + + +def _github_actions_workflow_refs( + settings: BootstrapSettings, + suffix: str, +) -> list[str]: + """Return allowed workflow refs for one CI config suffix.""" + if not settings.repo: + raise ValueError("repoSlug config is required for GitHub workflow refs.") + branch = settings.github_branch or "main" + workflow_prefix = f"{settings.org}/{settings.repo}/.github/workflows" + workflow_refs_by_suffix = { + "test-pr": [ + f"{workflow_prefix}/pulumi-pr-guardrails.yml@refs/*", + f"{workflow_prefix}/well-architected-evidence.yml@refs/*", + ], + "test": [ + f"{workflow_prefix}/pulumi-pr-guardrails.yml@refs/*", + f"{workflow_prefix}/pulumi-test-deploy.yml@refs/heads/{branch}", + f"{workflow_prefix}/nightly-guardrails.yml@refs/heads/{branch}", + f"{workflow_prefix}/pulumi-pr-command-runner.yml@refs/heads/{branch}", + f"{workflow_prefix}/operations-alert-triage.yml@refs/heads/{branch}", + f"{workflow_prefix}/well-architected-evidence.yml@refs/heads/{branch}", + ], + "prod-preview": [ + f"{workflow_prefix}/pulumi-prod.yml@refs/heads/{branch}", + f"{workflow_prefix}/nightly-guardrails.yml@refs/heads/{branch}", + f"{workflow_prefix}/pulumi-pr-command-runner.yml@refs/heads/{branch}", + ], + "prod": [ + f"{workflow_prefix}/pulumi-prod.yml@refs/heads/{branch}", + f"{workflow_prefix}/pulumi-pr-command-runner.yml@refs/heads/{branch}", + ], + } + return workflow_refs_by_suffix.get( + suffix, + [f"{workflow_prefix}/*.yml@refs/heads/{branch}"], + ) + + +def _ci_config_read_assume_role_policy( + provider_arn: str, + settings: BootstrapSettings, + suffix: str, +) -> str: + """Return trust policy for the GitHub AWS CI config read role.""" + return json.dumps( + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": {"Federated": provider_arn}, + "Action": "sts:AssumeRoleWithWebIdentity", + "Condition": { + "StringEquals": { + "token.actions.githubusercontent.com:aud": ( + "sts.amazonaws.com" + ), + "token.actions.githubusercontent.com:sub": ( + _github_actions_subjects(settings, suffix) + ), + }, + "StringLike": { + "token.actions.githubusercontent.com:job_workflow_ref": ( + _github_actions_workflow_refs(settings, suffix) + ) + }, + }, + } + ], + }, + sort_keys=True, + ) + + +def _ci_config_read_policy( + *, + account_id: str, + partition: str, + settings: BootstrapSettings, + suffixes: Sequence[str], +) -> str: + """Return least-privilege policy for GitHub to read CI secret payloads.""" + return json.dumps( + { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "ReadCiConfigurationSecrets", + "Effect": "Allow", + "Action": [ + "secretsmanager:DescribeSecret", + "secretsmanager:GetSecretValue", + ], + "Resource": _ci_secret_arn_patterns( + account_id=account_id, + partition=partition, + settings=settings, + suffixes=suffixes, + ), + } + ], + }, + sort_keys=True, + ) + + +def _is_missing_lookup_error(message: str, markers: tuple[str, ...]) -> bool: + """Return True when an AWS lookup error means the resource is absent.""" + return ( + any(marker in message for marker in markers) + or "not found" in message.lower() + or "couldn't find resource" in message + or "empty result" in message + ) + + +def _secret_exists(name: str) -> bool: + """Return True when a Secrets Manager secret already exists.""" + try: + secret = aws.secretsmanager.get_secret(name=name) + except Exception as exc: + if _is_missing_lookup_error( + str(exc), + ("ResourceNotFoundException", "ResourceNotFound"), + ): + return False + raise + return bool(getattr(secret, "arn", None)) + + +def _iam_role_exists(name: str) -> bool: + """Return True when the IAM role already exists.""" + try: + role = aws.iam.get_role(name=name) + except Exception as exc: + if _is_missing_lookup_error( + str(exc), + ("NoSuchEntity", "NoSuchEntityException"), + ): + return False + raise + return bool(getattr(role, "arn", None)) + + +class CiConfiguration(pulumi.ComponentResource): + """Provision AWS resources GitHub Actions uses for CI configuration.""" + + def __init__( + self, + name: str, + *, + settings: BootstrapSettings | None = None, + oidc_provider_arn: pulumi.Input[str] | None = None, + opts: pulumi.ResourceOptions | None = None, + ) -> None: + super().__init__("bootstrap:ci:CiConfiguration", name, None, opts) + + self._settings = settings or default_settings + provider_arn = oidc_provider_arn or self._settings.github_oidc_provider_arn + if provider_arn is None: + raise ValueError( + "githubOidcProviderArn config is required for AWS CI configuration." + ) + + suffixes = _ci_secret_suffixes(self._settings.environment) + account_id = aws.get_caller_identity().account_id + partition = aws.get_partition().partition + + self.secret_ids: dict[str, str] = {} + self.secret_arns: dict[str, pulumi.Output[str]] = {} + self.read_roles: dict[str, aws.iam.Role] = {} + self.read_role_arns: dict[str, pulumi.Output[str]] = {} + self.read_policies: dict[str, aws.iam.RolePolicy] = {} + + for suffix in suffixes: + secret_id = _ci_secret_id(self._settings, suffix) + secret = aws.secretsmanager.Secret( + f"{name}-secret-{suffix}", + name=secret_id, + description=( + "AWS Secrets Manager source-of-truth JSON for " + f"{_ci_config_project(self._settings)}/{suffix} CI." + ), + recovery_window_in_days=30, + tags=base_tags( + { + "Purpose": "ci-configuration", + "CiConfigSuffix": suffix, + "Repository": _ci_config_project(self._settings), + }, + settings=self._settings, + ), + opts=pulumi.ResourceOptions( + parent=self, + import_=secret_id if _secret_exists(secret_id) else None, + ), + ) + self.secret_ids[suffix] = secret_id + self.secret_arns[suffix] = secret.arn + + role_name = _ci_config_read_role_name(self._settings, suffix) + role = aws.iam.Role( + f"{name}-github-ci-config-read-role-{suffix}", + name=role_name, + assume_role_policy=apply_output( + pulumi.Output.from_input(provider_arn), + lambda arn, ci_suffix=suffix: _ci_config_read_assume_role_policy( + arn, + self._settings, + ci_suffix, + ), + ), + tags=base_tags( + { + "Purpose": "github-ci-configuration-read", + "CiConfigSuffix": suffix, + }, + settings=self._settings, + ), + opts=pulumi.ResourceOptions( + parent=self, + import_=role_name if _iam_role_exists(role_name) else None, + ), + ) + self.read_roles[suffix] = role + self.read_role_arns[suffix] = role.arn + policy = aws.iam.RolePolicy( + f"{name}-github-ci-config-read-policy-{suffix}", + name=f"{role_name}-policy", + role=role.id, + policy=_ci_config_read_policy( + account_id=account_id, + partition=partition, + settings=self._settings, + suffixes=(suffix,), + ), + opts=pulumi.ResourceOptions(parent=self), + ) + self.read_policies[suffix] = policy + + self.register_outputs( + { + "secret_ids": self.secret_ids, + "secret_arns": self.secret_arns, + "read_role_arns": self.read_role_arns, + } + ) diff --git a/scripts/_github_repository_controls.py b/scripts/_github_repository_controls.py index ee49fb2..d4f38f9 100644 --- a/scripts/_github_repository_controls.py +++ b/scripts/_github_repository_controls.py @@ -32,6 +32,8 @@ "Test Account Evidence", ) +OPERATIONS_ALERT_RECONCILE_ENVIRONMENT = "operations-alert-reconcile" + def required_status_checks_rule() -> dict[str, object]: """Return the ruleset rule that enforces the documented PR gates.""" @@ -89,8 +91,8 @@ def ruleset_payload(existing_rules: Sequence[Mapping[str, Any]] = ()) -> dict[st } -def prod_environment_payload(reviewer_id: int) -> dict[str, Any]: - """Build the protected production GitHub environment payload.""" +def protected_reviewer_environment_payload(reviewer_id: int) -> dict[str, Any]: + """Build a protected GitHub environment payload requiring one reviewer.""" return { "wait_timer": 0, "prevent_self_review": True, @@ -102,6 +104,18 @@ def prod_environment_payload(reviewer_id: int) -> dict[str, Any]: } +def prod_environment_payload(reviewer_id: int) -> dict[str, Any]: + """Build the protected production GitHub environment payload.""" + return protected_reviewer_environment_payload(reviewer_id) + + +def operations_alert_reconcile_environment_payload( + reviewer_id: int, +) -> dict[str, Any]: + """Build the protected operations-alert reconcile environment payload.""" + return protected_reviewer_environment_payload(reviewer_id) + + def required_status_check_items(rule: object) -> Sequence[object]: """Return required status check items from one ruleset rule.""" if not isinstance(rule, Mapping) or rule.get("type") != "required_status_checks": @@ -249,28 +263,48 @@ def reviewer_ids_from_items(items: Sequence[object]) -> set[int]: return reviewer_ids -def prod_environment_verification_blockers( - environment: Mapping[str, Any] | None, reviewer_id: int +def protected_environment_verification_blockers( + environment: Mapping[str, Any] | None, + reviewer_id: int, + *, + label: str, ) -> list[str]: - """Return blockers when the prod environment does not match expectations.""" + """Return blockers when a protected environment does not match expectations.""" if environment is None: - return ["Production environment was not readable after apply."] + return [f"{label} was not readable after apply."] blockers: list[str] = [] if not environment_prevents_self_review(environment): - blockers.append("Production environment does not prevent self-review.") + blockers.append(f"{label} does not prevent self-review.") branch_policy = environment.get("deployment_branch_policy") if not isinstance(branch_policy, Mapping): - blockers.append("Production environment does not report a branch policy.") + blockers.append(f"{label} does not report a branch policy.") elif ( branch_policy.get("protected_branches") is not True or branch_policy.get("custom_branch_policies") is not False ): - blockers.append( - "Production environment does not restrict deployments to protected " - "branches." - ) + blockers.append(f"{label} does not restrict deployments to protected branches.") if reviewer_id not in environment_reviewer_ids(environment): - blockers.append( - "Production environment does not require the configured reviewer." - ) + blockers.append(f"{label} does not require the configured reviewer.") return blockers + + +def prod_environment_verification_blockers( + environment: Mapping[str, Any] | None, reviewer_id: int +) -> list[str]: + """Return blockers when the prod environment does not match expectations.""" + return protected_environment_verification_blockers( + environment, + reviewer_id, + label="Production environment", + ) + + +def operations_alert_reconcile_environment_verification_blockers( + environment: Mapping[str, Any] | None, reviewer_id: int +) -> list[str]: + """Return blockers when the operations-alert reconcile environment is weak.""" + return protected_environment_verification_blockers( + environment, + reviewer_id, + label="Operations alert reconcile environment", + ) diff --git a/scripts/configure_github_repository_controls.py b/scripts/configure_github_repository_controls.py index 31e2e33..b6b940f 100644 --- a/scripts/configure_github_repository_controls.py +++ b/scripts/configure_github_repository_controls.py @@ -7,15 +7,30 @@ import json import subprocess # nosec B404 import sys -from collections.abc import Mapping, Sequence +from collections.abc import Callable, Mapping, Sequence from typing import Any import _github_repository_controls as _repository_controls REQUIRED_STATUS_CHECKS = _repository_controls.REQUIRED_STATUS_CHECKS +OPERATIONS_ALERT_RECONCILE_ENVIRONMENT = ( + _repository_controls.OPERATIONS_ALERT_RECONCILE_ENVIRONMENT +) +operations_alert_reconcile_environment_payload = ( + _repository_controls.operations_alert_reconcile_environment_payload +) +protected_reviewer_environment_payload = ( + _repository_controls.protected_reviewer_environment_payload +) prod_environment_payload = _repository_controls.prod_environment_payload ruleset_payload = _repository_controls.ruleset_payload _environment_reviewer_ids = _repository_controls.environment_reviewer_ids +_operations_alert_reconcile_environment_verification_blockers = ( + _repository_controls.operations_alert_reconcile_environment_verification_blockers +) +_protected_environment_verification_blockers = ( + _repository_controls.protected_environment_verification_blockers +) _prod_environment_verification_blockers = ( _repository_controls.prod_environment_verification_blockers ) @@ -30,12 +45,17 @@ __all__ = ( "REQUIRED_STATUS_CHECKS", + "OPERATIONS_ALERT_RECONCILE_ENVIRONMENT", "build_parser", "configure", "main", + "operations_alert_reconcile_environment_payload", "prod_environment_payload", + "protected_reviewer_environment_payload", "ruleset_payload", "_environment_reviewer_ids", + "_operations_alert_reconcile_environment_verification_blockers", + "_protected_environment_verification_blockers", "_prod_environment_verification_blockers", "_required_status_check_items", "_required_status_contexts", @@ -108,24 +128,45 @@ def _repo_admin_allowed(repo: str) -> bool: return isinstance(permissions, Mapping) and permissions.get("admin") is True +def _environment_verification_blockers( + repo: str, + *, + environment_name: str, + reviewer_id: int, + blocker_fn: Callable[[Mapping[str, Any] | None, int], list[str]], +) -> list[str]: + """Return verification blockers for one protected GitHub environment.""" + try: + environment_payload = _run_gh_api( + [f"repos/{repo}/environments/{environment_name}"] + ) + except RuntimeError as exc: + return [f"{environment_name} environment was not readable: {exc}."] + environment = ( + environment_payload if isinstance(environment_payload, Mapping) else None + ) + return blocker_fn(environment, reviewer_id) + + def _verify_applied_controls(repo: str, reviewer_id: int) -> dict[str, Any]: """Fetch and verify repository controls after an admin apply.""" ruleset = _main_ruleset(repo) - try: - environment_payload = _run_gh_api([f"repos/{repo}/environments/prod"]) - except RuntimeError as exc: - environment = None - environment_blockers = [f"Production environment was not readable: {exc}."] - else: - environment = ( - environment_payload if isinstance(environment_payload, Mapping) else None - ) - environment_blockers = _prod_environment_verification_blockers( - environment, reviewer_id - ) + prod_environment_blockers = _environment_verification_blockers( + repo, + environment_name="prod", + reviewer_id=reviewer_id, + blocker_fn=_prod_environment_verification_blockers, + ) + reconcile_environment_blockers = _environment_verification_blockers( + repo, + environment_name=OPERATIONS_ALERT_RECONCILE_ENVIRONMENT, + reviewer_id=reviewer_id, + blocker_fn=_operations_alert_reconcile_environment_verification_blockers, + ) blockers = [ *_ruleset_verification_blockers(ruleset), - *environment_blockers, + *prod_environment_blockers, + *reconcile_environment_blockers, ] if blockers: raise RuntimeError(" ".join(blockers)) @@ -133,6 +174,8 @@ def _verify_applied_controls(repo: str, reviewer_id: int) -> dict[str, Any]: "requiredStatusChecks": sorted(_required_status_contexts(ruleset or {})), "prodReviewerId": reviewer_id, "prodEnvironment": "prod", + "operationsAlertReconcileReviewerId": reviewer_id, + "operationsAlertReconcileEnvironment": OPERATIONS_ALERT_RECONCILE_ENVIRONMENT, } @@ -165,6 +208,9 @@ def configure( payloads: dict[str, Any] = {"ruleset": ruleset_payload(existing_rules)} if apply: payloads["prodEnvironment"] = prod_environment_payload(reviewer_id) + payloads["operationsAlertReconcileEnvironment"] = ( + operations_alert_reconcile_environment_payload(reviewer_id) + ) if existing and isinstance(existing.get("id"), int): _run_gh_api( [f"repos/{repo}/rulesets/{existing['id']}", "--method", "PUT"], @@ -179,10 +225,22 @@ def configure( [f"repos/{repo}/environments/prod", "--method", "PUT"], input_payload=payloads["prodEnvironment"], ) + _run_gh_api( + [ + f"repos/{repo}/environments/{OPERATIONS_ALERT_RECONCILE_ENVIRONMENT}", + "--method", + "PUT", + ], + input_payload=payloads["operationsAlertReconcileEnvironment"], + ) payloads["verification"] = _verify_applied_controls(repo, reviewer_id) else: payloads["prodEnvironment"] = prod_environment_payload(reviewer_id) + payloads["operationsAlertReconcileEnvironment"] = ( + operations_alert_reconcile_environment_payload(reviewer_id) + ) payloads["prodEnvironmentReviewerLogin"] = reviewer + payloads["operationsAlertReconcileEnvironmentReviewerLogin"] = reviewer print(json.dumps(payloads, indent=2, sort_keys=True)) @@ -190,7 +248,9 @@ def configure( def build_parser() -> argparse.ArgumentParser: """Build the CLI parser.""" parser = argparse.ArgumentParser( - description="Configure GitHub branch and production environment controls." + description=( + "Configure GitHub branch rules and protected environment controls." + ) ) parser.add_argument("--repo", required=True, help="Repository in owner/name form.") parser.add_argument( @@ -207,12 +267,15 @@ def build_parser() -> argparse.ArgumentParser: mode.add_argument( "--dry-run", action="store_true", - help="Print the ruleset and prod environment payloads without applying.", + help=("Print the ruleset and protected environment payloads without applying."), ) mode.add_argument( "--verify-only", action="store_true", - help="Verify existing ruleset and prod environment controls without applying.", + help=( + "Verify existing ruleset and protected environment controls " + "without applying." + ), ) return parser diff --git a/scripts/operations_alert_triage.py b/scripts/operations_alert_triage.py new file mode 100644 index 0000000..81a8b46 --- /dev/null +++ b/scripts/operations_alert_triage.py @@ -0,0 +1,269 @@ +#!/usr/bin/env python3 +"""Render sanitized operations-alert issues and stable duplicate fingerprints.""" + +from __future__ import annotations + +import argparse +import hashlib +import json +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Any + + +@dataclass(frozen=True) +class IssueContext: + """Non-secret metadata shared by the rendered GitHub issue body.""" + + queue_name: str + account_id: str + region: str + fingerprint: str + + +def load_json(value: object) -> dict[str, Any]: + """Return a JSON object from a nested SNS/SQS string field.""" + if isinstance(value, dict): + return {str(key): item for key, item in value.items()} + if not isinstance(value, str) or not value: + return {} + try: + loaded = json.loads(value) + except json.JSONDecodeError: + return {} + if isinstance(loaded, dict): + return {str(key): item for key, item in loaded.items()} + return {} + + +def safe_value(value: object) -> str: + """Return one sanitized metadata field for GitHub issue text.""" + if value in (None, ""): + return "unknown" + return str(value).replace("`", "'").replace("\n", " ")[:200] + + +def event_from_message( + message: dict[str, Any], +) -> tuple[dict[str, Any], dict[str, Any]]: + """Extract SNS and EventBridge metadata from one SQS message.""" + sns = load_json(message.get("Body")) + return sns, load_json(sns.get("Message")) + + +def alert_messages(alerts: dict[str, Any]) -> list[dict[str, Any]]: + """Return only well-formed SQS message objects from an alert batch.""" + messages = alerts.get("Messages") + if not isinstance(messages, list): + return [] + return [message for message in messages if isinstance(message, dict)] + + +def message_fields(message: dict[str, Any]) -> dict[str, object]: + """Return sanitized field names and values for one alert occurrence.""" + sns, event = event_from_message(message) + attributes = message.get("Attributes") + attributes = attributes if isinstance(attributes, dict) else {} + return { + "sqsMessageId": message.get("MessageId"), + "snsMessageId": sns.get("MessageId"), + "sentTimestamp": attributes.get("SentTimestamp"), + "eventSource": event.get("source"), + "detailType": event.get("detail-type"), + "eventTime": event.get("time") or sns.get("Timestamp"), + } + + +_VOLATILE_DETAIL_KEYS = frozenset( + { + "backupJobId", + "copyJobId", + "eventID", + "eventId", + "eventTime", + "id", + "recoveryPointArn", + "requestID", + "requestId", + "restoreJobId", + "time", + "x-amz-request-id", + "xAmzRequestId", + } +) + + +def stable_detail(value: object) -> object: + """Return EventBridge detail content without per-occurrence metadata.""" + if isinstance(value, dict): + return { + str(key): stable_detail(item) + for key, item in sorted(value.items()) + if str(key) not in _VOLATILE_DETAIL_KEYS + } + if isinstance(value, list): + return [stable_detail(item) for item in value] + if isinstance(value, str | int | float | bool) or value is None: + return value + return safe_value(value) + + +def canonical_json(value: object) -> str: + """Return deterministic compact JSON for fingerprint material.""" + return json.dumps(value, sort_keys=True, separators=(",", ":"), default=str) + + +def fingerprint_parts(message: dict[str, Any]) -> tuple[str, ...]: + """Return stable non-secret fields that identify one alert stream.""" + _, event = event_from_message(message) + detail = event.get("detail") + detail = detail if isinstance(detail, dict) else {} + return tuple( + safe_value(value) + for value in ( + event.get("source"), + event.get("detail-type"), + detail.get("state"), + detail.get("backupVaultName"), + detail.get("backupPlanId"), + detail.get("backupRuleId"), + detail.get("resourceArn"), + canonical_json(stable_detail(detail)), + canonical_json(stable_detail(event.get("resources"))), + ) + ) + + +def message_fingerprint(message: dict[str, Any]) -> str: + """Return the stable fingerprint for one alert stream.""" + digest_input = "|".join(fingerprint_parts(message)).encode("utf-8") + return hashlib.sha256(digest_input).hexdigest()[:24] + + +def alert_groups(alerts: dict[str, Any]) -> list[tuple[str, dict[str, Any]]]: + """Return alert messages grouped by stable per-stream fingerprint.""" + groups: dict[str, list[dict[str, Any]]] = {} + for message in alert_messages(alerts): + groups.setdefault(message_fingerprint(message), []).append(message) + return [ + (fingerprint, {"Messages": messages}) + for fingerprint, messages in sorted(groups.items()) + ] + + +def grouped_alerts_payload(alerts: dict[str, Any]) -> dict[str, object]: + """Return a JSON-serializable grouped alert manifest.""" + return { + "groups": [ + { + "fingerprint": fingerprint, + "messageCount": len(group["Messages"]), + "alerts": group, + } + for fingerprint, group in alert_groups(alerts) + ] + } + + +def alerts_fingerprint(alerts: dict[str, Any]) -> str: + """Return a stable aggregate fingerprint for the current alert batch.""" + messages = alert_messages(alerts) + if not messages: + return "empty" + unique_parts = sorted({message_fingerprint(message) for message in messages}) + if len(unique_parts) == 1: + return unique_parts[0] + digest_input = "\n".join(unique_parts).encode("utf-8") + return hashlib.sha256(digest_input).hexdigest()[:24] + + +def render_issue_body( + alerts: dict[str, Any], + context: IssueContext, +) -> str: + """Render a sanitized GitHub issue body for alert triage.""" + messages = alert_messages(alerts) + lines = [ + f"", + f"The operations alert queue contains {len(messages)} message(s).", + "", + ( + "This issue intentionally records sanitized metadata only. Use the " + "message IDs, event source, detail type, event time, CloudTrail, and " + "linked runbooks for investigation; do not paste raw alert payloads, " + "stack exports, credentials, tokens, or private incident notes." + ), + "", + f"Queue: {safe_value(context.queue_name)}", + f"AWS account: {safe_value(context.account_id)}", + f"AWS region: {safe_value(context.region)}", + "", + "Messages:", + ] + for message in messages: + rendered = ", ".join( + f"{name}: `{safe_value(value)}`" + for name, value in message_fields(message).items() + ) + lines.append(f"- {rendered}") + lines.append("") + return "\n".join(lines) + + +def _build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description="Render sanitized operations alert issue content." + ) + parser.add_argument("--alerts-json", required=True) + parser.add_argument("--queue-name", required=True) + parser.add_argument("--account-id", required=True) + parser.add_argument("--region", required=True) + parser.add_argument("--body-file", required=True) + parser.add_argument("--fingerprint-file", required=True) + parser.add_argument("--groups-file") + return parser + + +def main(argv: list[str] | None = None) -> int: + args = _build_parser().parse_args(argv) + try: + with Path(args.alerts_json).open(encoding="utf-8") as alerts_file: + loaded_alerts = json.load(alerts_file) + except (OSError, json.JSONDecodeError, TypeError) as exc: + print( + f"error: {args.alerts_json} must contain a JSON object: {exc}", + file=sys.stderr, + ) + return 1 + if not isinstance(loaded_alerts, dict): + print( + f"error: {args.alerts_json} must contain a JSON object.", + file=sys.stderr, + ) + return 1 + alerts = {str(key): item for key, item in loaded_alerts.items()} + fingerprint = alerts_fingerprint(alerts) + if args.groups_file: + Path(args.groups_file).write_text( + f"{json.dumps(grouped_alerts_payload(alerts), sort_keys=True)}\n", + encoding="utf-8", + ) + Path(args.body_file).write_text( + render_issue_body( + alerts, + IssueContext( + queue_name=args.queue_name, + account_id=args.account_id, + region=args.region, + fingerprint=fingerprint, + ), + ), + encoding="utf-8", + ) + Path(args.fingerprint_file).write_text(f"{fingerprint}\n", encoding="utf-8") + return 0 + + +if __name__ == "__main__": # pragma: no cover + raise SystemExit(main()) diff --git a/scripts/render_well_architected_closeout.py b/scripts/render_well_architected_closeout.py index 9821a40..31dfc89 100644 --- a/scripts/render_well_architected_closeout.py +++ b/scripts/render_well_architected_closeout.py @@ -600,8 +600,9 @@ def render_closeout_bundle( f"`GITHUB_REPOSITORY_CONTROLS_REPO={repo} " "GITHUB_REPOSITORY_CONTROLS_MODE=--verify-only " "make configure-github-repository-controls`.", - "- Confirm `main` requires all PR checks and the `prod` environment " - "requires an independent reviewer.", + "- Confirm `main` requires all PR checks and the `prod` plus " + "`operations-alert-reconcile` environments require an independent " + "reviewer.", "- After real owner evidence JSON paths exist, set the non-secret " "repository Actions variables for hosted evidence runs. Leave any " "variable unset until its owner evidence exists.", diff --git a/scripts/run_pulumi_command.py b/scripts/run_pulumi_command.py index 16524f7..6c6f8b1 100644 --- a/scripts/run_pulumi_command.py +++ b/scripts/run_pulumi_command.py @@ -428,17 +428,13 @@ def _run_with_observable_output( ) -def _plan_decrypt_fallback_enabled(context: CommandContext) -> bool: - return context.env.get("GITHUB_ACTIONS") == "true" and bool( - context.env.get("PULUMI_EXPECTED_SHA") - ) - - -def _saved_prod_plan_recovery_enabled( +def _ci_saved_plan_recovery_enabled( context: CommandContext, combined_output: str, error_signature: str ) -> bool: - return error_signature in combined_output and _plan_decrypt_fallback_enabled( - context + return ( + error_signature in combined_output + and context.env.get("GITHUB_ACTIONS") == "true" + and bool(context.env.get("PULUMI_EXPECTED_SHA")) ) @@ -449,7 +445,7 @@ def _recover_failed_saved_prod_plan( result: subprocess.CompletedProcess[str], ) -> int | None: combined_output = f"{result.stdout or ''}{result.stderr or ''}" - if _saved_prod_plan_recovery_enabled(context, combined_output, PLAN_DECRYPT_ERROR): + if _ci_saved_plan_recovery_enabled(context, combined_output, PLAN_DECRYPT_ERROR): print( "error: saved Pulumi plan failed with the known KMS plan-decrypt " "error; refusing direct production apply because production must " @@ -458,7 +454,7 @@ def _recover_failed_saved_prod_plan( ) return result.returncode or 1 - if _saved_prod_plan_recovery_enabled(context, combined_output, STACK_LOCK_ERROR): + if _ci_saved_plan_recovery_enabled(context, combined_output, STACK_LOCK_ERROR): print( "warning: Pulumi reported a stack lock while applying the saved " "production plan; running pulumi cancel for the selected stack " @@ -512,11 +508,14 @@ def _pulumi_cancel_command(context: CommandContext, stack: str) -> list[str]: def _run_up_stack( context: CommandContext, stack: str, *, include_policy_pack: bool = True ) -> int | None: - if not _plan_decrypt_fallback_enabled(context): - _run_stack_command( - context, StackCommand("up", stack, include_policy_pack=include_policy_pack) + if context.env.get("GITHUB_ACTIONS") == "true": + print( + "error: direct Pulumi up is disabled in GitHub Actions; " + "generate and apply a reviewed saved plan with pulumi-plan and " + "pulumi-up-plan.", + file=sys.stderr, ) - return None + return 1 result = _run_with_observable_output( context, @@ -527,19 +526,6 @@ def _run_up_stack( if result.returncode == 0: return None - combined_output = f"{result.stdout or ''}{result.stderr or ''}" - if STACK_LOCK_ERROR in combined_output: - print( - "warning: Pulumi reported a stack lock during guarded direct apply; " - "running pulumi cancel for the selected stack and retrying once.", - file=sys.stderr, - ) - context.runner(_pulumi_cancel_command(context, stack), env=context.env) - _run_stack_command( - context, StackCommand("up", stack, include_policy_pack=include_policy_pack) - ) - return None - return result.returncode or 1 diff --git a/scripts/validate_ci_environment.py b/scripts/validate_ci_environment.py new file mode 100644 index 0000000..c7207f9 --- /dev/null +++ b/scripts/validate_ci_environment.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 +"""Validate AWS Secrets Manager-derived CI configuration without printing values.""" + +from __future__ import annotations + +import argparse +import os +import re +from collections.abc import Mapping +from dataclasses import dataclass + +AWS_ACCOUNT_ID_PATTERN = re.compile(r"^\d{12}$") +AWS_REGION_PATTERN = re.compile(r"^[a-z]{2}-[a-z]+-\d+$") +AWS_ROLE_ARN_PATTERN = re.compile(r"^arn:aws:iam::\d{12}:role/[A-Za-z0-9+=,.@_/-]+$") +SNS_TOPIC_ARN_PATTERN = re.compile(r"^arn:aws:sns:[a-z0-9-]+:\d{12}:[A-Za-z0-9_.-]+$") + + +@dataclass(frozen=True) +class ValidationIssue: + """One non-secret validation failure.""" + + name: str + message: str + + +def parse_required_keys(raw_value: str) -> tuple[str, ...]: + """Parse a comma-separated key list from the composite action input.""" + return tuple(key.strip() for key in raw_value.split(",") if key.strip()) + + +def missing_or_blank(keys: tuple[str, ...], environ: Mapping[str, str]) -> list[str]: + """Return required environment variable names that are unset or blank.""" + return [key for key in keys if not environ.get(key, "").strip()] + + +def validate_environment( + keys: tuple[str, ...], + environ: Mapping[str, str], +) -> list[ValidationIssue]: + """Return non-secret validation failures for AWS Secrets Manager-derived values.""" + issues = [ + ValidationIssue(key, "is required") for key in missing_or_blank(keys, environ) + ] + if issues: + return issues + + validators = { + "AWS_ACCOUNT_ID": _validate_account_id, + "AWS_REGION": _validate_region, + "AWS_PREVIEW_ROLE_ARN": _validate_role_arn, + "AWS_APPLY_ROLE_ARN": _validate_role_arn, + "AWS_DRIFT_ROLE_ARN": _validate_role_arn, + "AWS_OPERATIONS_ALERT_TRIAGE_ROLE_ARN": _validate_role_arn, + "PULUMI_BACKEND_URL": _validate_backend_url, + "PULUMI_SECRETS_PROVIDER": _validate_secrets_provider, + "PULUMI_PREVIEW_STACKS": _validate_stack_list, + "PULUMI_DRIFT_STACKS": _validate_stack_list, + "OPERATIONS_TOPIC_ARN": _validate_sns_topic_arn, + "OPERATIONS_ALERT_QUEUE_NAME": _validate_resource_name, + "OPERATIONS_CLOUDTRAIL_NAME": _validate_resource_name, + } + for key in keys: + validator = validators.get(key) + if validator is None: + continue + message = validator(environ[key].strip()) + if message: + issues.append(ValidationIssue(key, message)) + return issues + + +def _validate_account_id(value: str) -> str | None: + if AWS_ACCOUNT_ID_PATTERN.fullmatch(value): + return None + return "must be a 12-digit AWS account ID" + + +def _validate_region(value: str) -> str | None: + if AWS_REGION_PATTERN.fullmatch(value): + return None + return "must be an AWS region code" + + +def _validate_role_arn(value: str) -> str | None: + if AWS_ROLE_ARN_PATTERN.fullmatch(value): + return None + return "must be an IAM role ARN" + + +def _validate_backend_url(value: str) -> str | None: + if value.startswith("s3://"): + return None + return "must use an s3:// Pulumi backend" + + +def _validate_secrets_provider(value: str) -> str | None: + if value.startswith("awskms://"): + return None + return "must use an awskms:// Pulumi secrets provider" + + +def _validate_stack_list(value: str) -> str | None: + stacks = [stack.strip() for stack in value.split(",") if stack.strip()] + if stacks and all(re.fullmatch(r"[A-Za-z0-9_.:-]+", stack) for stack in stacks): + return None + return "must be a comma-separated list of stack names" + + +def _validate_sns_topic_arn(value: str) -> str | None: + if SNS_TOPIC_ARN_PATTERN.fullmatch(value): + return None + return "must be an SNS topic ARN" + + +def _validate_resource_name(value: str) -> str | None: + if re.fullmatch(r"[A-Za-z0-9_.-]{1,256}", value): + return None + return "must be a metadata-only AWS resource name" + + +def write_github_environment( + environ: Mapping[str, str], + output_path: str | None, +) -> None: + """Persist derived environment values for later GitHub Actions steps.""" + if not output_path: + return + aws_region = _github_env_value("AWS_REGION", environ.get("AWS_REGION", "")) + with open(output_path, "a", encoding="utf-8") as github_env: + if aws_region: + github_env.write(f"AWS_DEFAULT_REGION={aws_region}\n") + + +def _github_env_value(name: str, value: str) -> str: + """Return a single-line value safe for GitHub environment files.""" + stripped = value.strip() + if "\n" in stripped or "\r" in stripped: + raise ValueError(f"{name} must not contain newline characters") + return stripped + + +def _build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description="Validate CI configuration injected from AWS Secrets Manager." + ) + parser.add_argument("--purpose", required=True) + parser.add_argument("--required-keys", required=True) + return parser + + +def main(argv: list[str] | None = None) -> int: + args = _build_parser().parse_args(argv) + required_keys = parse_required_keys(args.required_keys) + if not required_keys: + print("error: required-keys must include at least one environment variable.") + return 1 + + issues = validate_environment(required_keys, os.environ) + if issues: + for issue in issues: + print(f"error: {issue.name} {issue.message}.") + return 1 + + try: + write_github_environment(os.environ, os.environ.get("GITHUB_ENV")) + except ValueError as exc: + print(f"error: {exc}.") + return 1 + print( + "Validated AWS Secrets Manager-derived CI configuration " + f"for {args.purpose} using a fixed CI secret." + ) + return 0 + + +if __name__ == "__main__": # pragma: no cover + raise SystemExit(main()) diff --git a/specs/issue-18-multi-account-test-prod-pulumi-environments/epics.md b/specs/issue-18-multi-account-test-prod-pulumi-environments/epics.md index 81c7071..9fd3284 100644 --- a/specs/issue-18-multi-account-test-prod-pulumi-environments/epics.md +++ b/specs/issue-18-multi-account-test-prod-pulumi-environments/epics.md @@ -1,5 +1,9 @@ # Epics and Stories: Multi-Account Pulumi Environments +> Superseded by issue 20 for privileged CI configuration. Fixed AWS Secrets +> Manager CI configuration secrets now carry account configuration; protected +> GitHub `prod` remains the approval boundary. + ## Epic 1: Stack And Discovery Contracts ### Story 1.1: Add non-secret test and prod stack configs @@ -22,11 +26,11 @@ As an SRE, I want CI stack discovery to avoid example files so shared backend jo ## Epic 2: GitHub Actions Deployment Paths ### Story 2.1: Refactor PR guardrails for the test environment -As a maintainer, I want trusted PR guardrails to use the `test` environment and fork PRs to stay unprivileged. +As a maintainer, I want trusted PR guardrails to use fixed test PR account configuration and fork PRs to stay unprivileged. **Acceptance Criteria:** -- Given a trusted same-repo PR runs guardrails, Then privileged jobs use `environment: test`. -- Given a privileged PR guardrail job runs, Then it reads environment-scoped variables. +- Given a trusted same-repo PR runs guardrails, Then privileged jobs use fixed test PR account configuration. +- Given a privileged PR guardrail job runs, Then it reads account-scoped variables. - Given OIDC credentials are configured, Then the workflow uses `AWS_PREVIEW_ROLE_ARN` and `allowed-account-ids`. - Given a fork PR runs guardrails, Then it runs unprivileged preview and IAM input extraction. - Given privileged config is missing for a same-repo run, Then the workflow fails before preview. @@ -37,7 +41,7 @@ As an SRE, I want `main` merges to deploy to the test account only after preview **Acceptance Criteria:** - Given code is pushed to `main`, Then the test deploy workflow runs. - Given a maintainer manually dispatches the workflow, Then the test deploy workflow runs. -- Given the workflow runs, Then it uses `environment: test`. +- Given the workflow runs, Then it uses fixed test account configuration. - Given preview completes, Then the same preview artifact feeds destructive-diff and IAM validation. - Given apply runs, Then it uses `AWS_APPLY_ROLE_ARN`. - Given post-apply drift runs, Then it uses `AWS_DRIFT_ROLE_ARN`. @@ -47,7 +51,7 @@ As a release approver, I want production apply to require a reviewed preview and **Acceptance Criteria:** - Given a maintainer dispatches production, Then the workflow accepts a commit SHA. -- Given production preview runs, Then the preview job uses `environment: prod-preview`. +- Given production preview runs, Then the preview job uses fixed production preview account configuration. - Given production apply runs, Then the apply job uses `environment: prod`. - Given apply starts, Then it verifies the approved SHA matches the preview SHA. - Given a Pulumi plan artifact exists, Then apply uses that saved plan. @@ -58,13 +62,13 @@ As an SRE, I want drift detection to run separately for test and prod with expli **Acceptance Criteria:** - Given nightly guardrails run, Then the workflow has a `test` drift job. - Given nightly guardrails run, Then the workflow has a `prod-preview` drift job. -- Given either drift job runs, Then it uses environment-scoped variables. +- Given either drift job runs, Then it uses account-scoped variables. - Given required variables are missing, Then the drift job fails. ## Epic 3: Documentation And Auditability -### Story 3.1: Document GitHub environment variables and protection -As a repository administrator, I want setup docs for the `test`, `prod-preview`, and `prod` environments. +### Story 3.1: Document privileged CI variables and production protection +As a repository administrator, I want setup docs for the test, production preview, and protected production paths. **Acceptance Criteria:** - Given setup docs are read, Then `docs/github-actions-secrets.md` documents environment variables and optional secrets. diff --git a/specs/issue-18-multi-account-test-prod-pulumi-environments/prd.md b/specs/issue-18-multi-account-test-prod-pulumi-environments/prd.md index 227e4b2..2035712 100644 --- a/specs/issue-18-multi-account-test-prod-pulumi-environments/prd.md +++ b/specs/issue-18-multi-account-test-prod-pulumi-environments/prd.md @@ -1,30 +1,35 @@ # PRD: Multi-Account Pulumi Environments +> Superseded by issue 20 for privileged CI configuration. GitHub +> non-production environments from this plan have been replaced by fixed AWS +> Secrets Manager CI configuration secrets; protected GitHub `prod` remains the +> approval boundary. + ## Executive Summary -This change makes the bootstrap infrastructure repository deployable across separate AWS test and production accounts with GitHub environment-scoped configuration, OIDC-only credentials, S3 Pulumi backends, and AWS KMS Pulumi secrets providers. The primary users are maintainers and SREs who need auditable preview, apply, drift, and smoke-validation paths without sharing state or credentials between environments. +This change makes the bootstrap infrastructure repository deployable across separate AWS test and production accounts with account-scoped CI configuration, OIDC-only credentials, S3 Pulumi backends, and AWS KMS Pulumi secrets providers. The primary users are maintainers and SREs who need auditable preview, apply, drift, and smoke-validation paths without sharing state or credentials between environments. ## Success Criteria | ID | Criterion | Measurement | | --- | --- | --- | -| SC-1 | Pull requests from trusted same-repo branches run a real test-account Pulumi preview. | `pulumi-pr-guardrails.yml` binds privileged preview and IAM validation to the `test` GitHub environment and fails fast when required environment variables are absent. | +| SC-1 | Pull requests from trusted same-repo branches run a real test-account Pulumi preview. | `pulumi-pr-guardrails.yml` binds privileged preview and IAM validation to the fixed test PR account configuration and fails fast when required variables are absent. | | SC-2 | Pull requests from forks never receive AWS credentials. | The PR guardrail workflow selects an unprivileged path for fork PRs in a job without a GitHub environment or `id-token: write`. | -| SC-3 | Merges to `main` deploy only to the test account before any production path. | A merge workflow uses the `test` environment, validates preview artifacts, applies the `test` stack, and runs post-apply drift. | -| SC-4 | Production apply requires a successful test deploy, production preview, and GitHub environment approval. | A production workflow verifies a successful `Pulumi Test Deploy` run for the same SHA, generates a `prod-preview` artifact, then gates `prod` apply behind the `prod` environment and commit SHA verification. | -| SC-5 | Nightly drift validates both target accounts. | Nightly guardrails run separate `test` and `prod-preview` environment jobs with account-specific roles and stack lists. | +| SC-3 | Merges to `main` deploy only to the test account before any production path. | A merge workflow uses the fixed test account configuration, validates preview artifacts, applies the `test` stack, and runs post-apply drift. | +| SC-4 | Production apply requires a successful test deploy, production preview, and GitHub environment approval. | A production workflow verifies a successful `Pulumi Test Deploy` run for the same SHA, generates a production preview artifact, then gates `prod` apply behind protected `prod` approval and commit SHA verification. | +| SC-5 | Nightly drift validates both target accounts. | Nightly guardrails run separate test and production preview account jobs with account-specific roles and stack lists. | | SC-6 | Stack configs are committed without secrets or legacy passphrase metadata. | `pulumi/Pulumi.test.yaml` and `pulumi/Pulumi.prod.yaml` contain only non-secret config and KMS initialization comments. | ## Product Scope MVP scope: - Committed non-secret `test` and `prod` Pulumi stack config. -- Environment-scoped GitHub Actions variables for account, region, role, backend, stack, and KMS provider values. +- Account-scoped CI variables for account, region, role, backend, stack, and KMS provider values. - Explicit PR preview, test deploy, production preview/apply, and nightly drift workflows. - Docs and structural tests for the environment model. Growth scope: - Tighter generated least-privilege AWS role policies from Pulumi outputs. -- Automated GitHub environment variable drift reports. +- Automated privileged CI configuration drift reports. Out of scope: - Adding static AWS access keys. @@ -47,7 +52,7 @@ Infrastructure automation changes must use least privilege, short-lived credenti ## Innovation Analysis -The design uses GitHub environments as the configuration and approval boundary instead of repository-wide variables. This makes account, role, backend, and stack selection explicit in every privileged job and lets production apply inherit GitHub environment protection controls. +The original design used GitHub environments as the configuration and approval boundary instead of repository-wide variables. Issue 20 supersedes that model by keeping account, role, backend, and stack selection in AWS Secrets Manager JSON values read by fixed GitHub OIDC roles at runtime, while preserving protected GitHub `prod` approval controls. ## Project-Type Requirements @@ -57,20 +62,20 @@ This is developer infrastructure automation. Workflows must be scriptable, non-i | ID | Requirement | Test Criteria | | --- | --- | --- | -| FR-1 | Trusted PRs can run a real Pulumi preview against the `test` stack. | Workflow uses `environment: test`, `AWS_PREVIEW_ROLE_ARN`, `PULUMI_BACKEND_URL`, `PULUMI_SECRETS_PROVIDER`, and `PULUMI_PREVIEW_STACKS`. | +| FR-1 | Trusted PRs can run a real Pulumi preview against the `test` stack. | Workflow uses fixed test PR account configuration with `AWS_PREVIEW_ROLE_ARN`, `PULUMI_BACKEND_URL`, `PULUMI_SECRETS_PROVIDER`, and `PULUMI_PREVIEW_STACKS`. | | FR-2 | IAM validation uses AWS credentials only for trusted privileged jobs. | Workflow configures OIDC after trust/missing-config checks and runs `make test-iam-validation` only in privileged mode. | | FR-3 | Fork PRs run without AWS credentials. | Workflow checks fork source in a credential-free job and runs unprivileged preview/IAM paths without a GitHub environment or OIDC token permission. | -| FR-4 | Merges to `main` can apply only the `test` stack. | Test deploy workflow uses `environment: test`, test apply role, test stack variables, and post-apply drift. | -| FR-5 | Production preview uses read-only production access. | Production workflow preview job uses `environment: prod-preview` and `AWS_PREVIEW_ROLE_ARN`. | -| FR-6 | Production apply requires a successful test deployment, the approved `prod` environment, and reviewed commit SHA. | Production preview verifies a successful `Pulumi Test Deploy` run for the SHA; production apply uses `environment: prod`, validates SHA equality, and applies the saved plan. | +| FR-4 | Merges to `main` can apply only the `test` stack. | Test deploy workflow uses fixed test account configuration, test apply role, test stack variables, and post-apply drift. | +| FR-5 | Production preview uses read-only production access. | Production workflow preview job uses fixed production preview account configuration and `AWS_PREVIEW_ROLE_ARN`. | +| FR-6 | Production apply requires a successful test deployment, protected `prod` approval, and reviewed commit SHA. | Production preview verifies a successful `Pulumi Test Deploy` run for the SHA; production apply uses protected GitHub `prod`, validates SHA equality, and applies the saved plan. | | FR-7 | Privileged logs show account, stack, environment, role, and guardrail mode without secrets. | Workflows emit sanitized evidence lines and do not call secret-revealing Pulumi or AWS commands. | -| FR-8 | Nightly drift checks run for `test` and `prod`. | Nightly guardrail workflow contains separate jobs bound to `test` and `prod-preview`. | +| FR-8 | Nightly drift checks run for `test` and `prod`. | Nightly guardrail workflow contains separate jobs bound to fixed test and production preview account configuration. | | FR-9 | Stack discovery excludes example stacks unless explicitly configured. | Helper discovery ignores the exact template file `Pulumi.example.yaml`. | ## Non-Functional Requirements - The system shall use OIDC-only AWS credentials for privileged GitHub jobs as measured by absence of static AWS key secrets in workflows. - The system shall use AWS KMS Pulumi secrets providers for stack initialization as measured by workflow/script calls containing `--secrets-provider "$PULUMI_SECRETS_PROVIDER"`. -- The system shall fail privileged same-repo guardrails when required environment-scoped variables are missing as measured by workflow prerequisite checks. +- The system shall fail privileged same-repo guardrails when required account-scoped variables are missing as measured by workflow prerequisite checks. - The system shall retain preview evidence for no more than 14 days as measured by upload-artifact retention settings. - The system shall avoid secret-revealing commands as measured by workflow and script tests checking for forbidden flags. diff --git a/specs/issue-18-multi-account-test-prod-pulumi-environments/well-architected-review.md b/specs/issue-18-multi-account-test-prod-pulumi-environments/well-architected-review.md index 2e4b4d6..8fabb9b 100644 --- a/specs/issue-18-multi-account-test-prod-pulumi-environments/well-architected-review.md +++ b/specs/issue-18-multi-account-test-prod-pulumi-environments/well-architected-review.md @@ -50,7 +50,7 @@ Overall score: ## Remaining Improvements -1. Move CI configuration to Pulumi ESC and minimize GitHub Environment +1. Move CI configuration to AWS Secrets Manager and minimize GitHub Environment variables. Tracked in issue #20. 2. Add a saved-plan manifest that records stack name, backend URL, commit SHA, and SHA-256 for each plan, then verify it before `pulumi up --plan`. diff --git a/specs/issue-20-pulumi-esc-ci-config/architecture.md b/specs/issue-20-pulumi-esc-ci-config/architecture.md new file mode 100644 index 0000000..d22f36d --- /dev/null +++ b/specs/issue-20-pulumi-esc-ci-config/architecture.md @@ -0,0 +1,129 @@ +# Architecture: Issue 20 AWS Secrets Manager CI Configuration + +## Control Boundaries + +AWS Secrets Manager owns the privileged account-local configuration. GitHub +Actions uses GitHub OIDC to assume one `GitHubCiConfigRead-*` role per fixed CI +suffix, reads one AWS Secrets Manager JSON secret, and exports selected keys as +workflow environment variables. Pulumi Cloud and Pulumi ESC are not used. GitHub +`prod` remains the only deployment environment because it adds human approval +and branch restrictions for production apply. + +```text +GitHub workflow + -> GitHub repository variables select the config-read role ARN and region + -> GitHub OIDC assumes the fixed GitHubCiConfigRead-* role + -> AWS Secrets Manager loader reads the fixed CI secret suffix + -> scripts/validate_ci_environment.py validates exported variables + -> aws-actions/configure-aws-credentials assumes purpose-specific role + -> Make/Pulumi command runs with sanitized evidence +``` + +Fork pull requests stay on the existing unprivileged artifact path and do not +request OIDC. + +## AWS CI Config Contract + +Each fixed suffix maps to one AWS Secrets Manager JSON secret: + +| AWS Secrets Manager CI secret suffix | AWS Secrets Manager secret ID | +| --- | --- | +| `test-pr` | `/bootstrap-infrastructure/ci/test-pr` | +| `test` | `/bootstrap-infrastructure/ci/test` | +| `prod-preview` | `/bootstrap-infrastructure/ci/prod-preview` | +| `prod` | `/bootstrap-infrastructure/ci/prod` | + +The Pulumi `test` stack creates the `test-pr` and `test` AWS Secrets Manager +secret containers plus matching `GitHubCiConfigRead-*` roles. The Pulumi `prod` +stack creates the `prod-preview` and `prod` containers plus matching production +read roles. Pulumi does not own secret versions or secret values; operators +populate and rotate the JSON payloads directly in AWS Secrets Manager after the +containers exist. The `githubCiConfigReadRoleArns` stack output gives operators +the role ARNs to store as GitHub repository variables. + +Common AWS CI config variables: + +- `AWS_ACCOUNT_ID` +- `AWS_REGION` +- `PULUMI_BACKEND_URL` +- `PULUMI_SECRETS_PROVIDER` + +Purpose-specific AWS CI variables: + +- `AWS_PREVIEW_ROLE_ARN` +- `AWS_APPLY_ROLE_ARN` +- `AWS_DRIFT_ROLE_ARN` +- `AWS_OPERATIONS_ALERT_TRIAGE_ROLE_ARN` +- `OPERATIONS_ALERT_QUEUE_NAME` +- `OPERATIONS_TOPIC_ARN` +- `OPERATIONS_CLOUDTRAIL_NAME` +- `PULUMI_PREVIEW_STACKS` +- `PULUMI_DRIFT_STACKS` + +Pulumi stack config may include only non-account-local static configuration. +Do not use Pulumi config to store AWS account IDs, role ARNs, backend URLs, +stack lists, or secrets-provider URIs. Shared CI stacks still initialize or +migrate with `--secrets-provider "$PULUMI_SECRETS_PROVIDER"`, and the provider +must be `awskms://`. + +## AWS Trust Model + +Non-production automation roles trust: + +- `repo:VilnaCRM-Org/bootstrap-infrastructure:ref:refs/heads/main` +- `repo:VilnaCRM-Org/bootstrap-infrastructure:pull_request` + +Production apply roles trust only: + +- `repo:VilnaCRM-Org/bootstrap-infrastructure:environment:prod` + +Trust conditions also bind +`token.actions.githubusercontent.com:job_workflow_ref` to the workflow files +that need the role. The operations alert triage role trusts only +`operations-alert-triage.yml@refs/heads/main`. + +## Operations Alert Dedupe + +`scripts/operations_alert_triage.py` renders sanitized issue bodies and a stable +fingerprint marker: + +```text + +``` + +The fingerprint uses durable alert identity fields such as source, detail type, +state, backup vault, backup plan, backup rule, resource ARN, stable +EventBridge detail, and resources. It deliberately ignores occurrence IDs such +as SQS message ID, SNS message ID, backup job ID, request ID, and event time. +The workflow splits mixed SQS batches into one GitHub issue update per stable +alert stream, then deletes queue messages only after every issue creation or +comment creation succeeds. + +Legacy alert issues created before this marker was introduced will not be +auto-deduped. The first post-merge run creates or updates a canonical +fingerprinted issue. Maintainers can then link and close older duplicates after +confirming the sanitized AWS Backup events share the same underlying stream, or +they can edit one chosen issue body to include the computed marker from retained +raw payloads. + +The manual Operations Alert Legacy Reconcile workflow gives SREs a GitOps-owned +cleanup path after confirmation. It requires a canonical fingerprinted issue, +requires an HTTPS SRE confirmation reference, rejects already-fingerprinted +legacy issues, and closes confirmed legacy issues with +`gh issue close --duplicate-of`. + +## Manual Secure Steps + +- Apply the Pulumi `test` and `prod` stacks so AWS contains the four Secrets + Manager containers and AWS CI config read roles. +- Populate the four AWS Secrets Manager JSON values in the owning AWS accounts. +- Create the four AWS Secrets Manager CI secrets and configure each one to import its JSON + secret with `aws secretsmanager get-secret-value`. +- Configure GitHub-to-AWS OIDC for this repository and GitHub OIDC for each + AWS Secrets Manager read role. +- Apply the Pulumi trust-policy update in each AWS account through the normal + stack process. +- Keep protected GitHub `prod` reviewers and deployment branch restrictions in + place. +- Verify test-account AWS metadata with local AWS CLI credentials and + production metadata with AWS MCP/read-only access before enabling apply. diff --git a/specs/issue-20-pulumi-esc-ci-config/current-closeout-evidence-2026-05-25.md b/specs/issue-20-pulumi-esc-ci-config/current-closeout-evidence-2026-05-25.md new file mode 100644 index 0000000..dae37fb --- /dev/null +++ b/specs/issue-20-pulumi-esc-ci-config/current-closeout-evidence-2026-05-25.md @@ -0,0 +1,235 @@ +# Current Closeout Evidence: Issue 20 AWS Secrets Manager CI Configuration + +Recorded on 2026-05-25 in the `Europe/Sofia` timezone for branch +`codex/issue20-pulumi-esc`. + +## Audited PR Head + +| Field | Value | +| --- | --- | +| PR | `https://github.com/VilnaCRM-Org/bootstrap-infrastructure/pull/57` | +| Latest implementation code head SHA | `1470d289e910765f7a772806a31819dd428ca717` | +| Latest implementation code short SHA | `1470d28` | +| Source of truth | AWS Secrets Manager remains the source of truth for account-local CI values; Pulumi Cloud and Pulumi ESC are not used for CI configuration. | +| Secret handling | No secret values, `GetSecretValue` responses, decrypted stack outputs, access keys, or tokens were read or recorded. | + +## GitHub State + +Open repository issues at the time of this audit: + +| Issue | State | Current disposition | +| --- | --- | --- | +| `#20` | Open | GitOps implementation is present in PR `#57`; live closeout still needs external AWS CI config/AWS setup and successful privileged checks. | +| `#49` | Open | Legacy unmarked operations-alert issue; do not close until a canonical fingerprinted issue exists and SRE confirms it is the same alert stream with a sanitized HTTPS confirmation reference. | +| `#50` | Open | Legacy unmarked operations-alert issue; do not close until a canonical fingerprinted issue exists and SRE confirms it is the same alert stream with a sanitized HTTPS confirmation reference. | +| `#52` | Open | Legacy unmarked operations-alert issue; do not close until a canonical fingerprinted issue exists and SRE confirms it is the same alert stream with a sanitized HTTPS confirmation reference. | +| `#53` | Open | Legacy unmarked operations-alert issue; do not close until a canonical fingerprinted issue exists and SRE confirms it is the same alert stream with a sanitized HTTPS confirmation reference. | +| `#54` | Open | Legacy unmarked operations-alert issue; do not close until a canonical fingerprinted issue exists and SRE confirms it is the same alert stream with a sanitized HTTPS confirmation reference. | +| `#55` | Open | Legacy unmarked operations-alert issue; do not close until a canonical fingerprinted issue exists and SRE confirms it is the same alert stream with a sanitized HTTPS confirmation reference. | +| `#56` | Open | Legacy unmarked operations-alert issue; do not close until a canonical fingerprinted issue exists and SRE confirms it is the same alert stream with a sanitized HTTPS confirmation reference. | +| `#58` | Open | Legacy unmarked operations-alert issue; do not close until a canonical fingerprinted issue exists and SRE confirms it is the same alert stream with a sanitized HTTPS confirmation reference. | + +Current PR `#57` review state is approved, but merge state is still blocked by +external AWS setup. The latest audited remote checks have all repo-owned jobs +green except the two expected privileged AWS setup checks: + +- `Preview` +- `Test Account Evidence` + +The prior Pulumi Cloud-era privileged checks failed before AWS-only loading was +implemented: + +```text +Invalid response from token exchange 400: Bad Request (invalid_request: invalid organization vilnacrm-org) +``` + +The AWS-only setup removes that Pulumi Cloud token exchange path. Remaining live +setup work is limited to valid AWS credentials, AWS Secrets Manager payloads, +and AWS OIDC role trust. Earlier AWS-only remote failures stopped before AWS +credentials were requested because the repository variables were still empty: + +```text +config-role-arn must be an AWS IAM role ARN. +``` + +The six non-secret GitHub repository variables were configured on 2026-05-25 +with deterministic `GitHubCiConfigRead-*` role ARNs and account regions. After +rerunning the failed jobs, both privileged checks advanced to AWS OIDC and now +fail at test-account role assumption: + +```text +Could not assume role with OIDC: Not authorized to perform sts:AssumeRoleWithWebIdentity +``` + +That current failure proves GitHub can resolve the AWS-only metadata variables, +but the test account still needs the reviewed Pulumi stack apply or equivalent +GitOps-controlled IAM trust update before the runner can read the test +Secrets Manager CI configuration. + +The latest implementation code head additionally enforces that direct +`pulumi up` is disabled whenever `GITHUB_ACTIONS=true`; GitHub apply paths must +generate and apply a reviewed saved plan with `pulumi-plan` and +`pulumi-up-plan`. + +The same head also removes the silent pull-request fallback from +`AWS_TEST_PR_CI_CONFIG_ROLE_ARN` to `AWS_TEST_CI_CONFIG_ROLE_ARN`. Pull request +jobs now select the exact `test-pr` CI configuration in a shell step and fail +fast if the PR config-read role variable is missing. + +Local validation on this implementation head passed: + +- `uv run pytest tests/unit/test_script_entrypoints.py::test_run_up_plan_stack_rejects_plan_decrypt_without_direct_apply tests/unit/test_script_entrypoints.py::test_run_up_plan_stack_recovers_from_saved_plan_lock tests/unit/test_script_entrypoints.py::test_run_up_stack_does_not_retry_after_lock tests/unit/test_script_entrypoints.py::test_run_pulumi_command_unhandled_apply_failures_return_status -q` +- `uv run pytest tests/unit/test_script_entrypoints.py -k "run_up_plan_stack or run_up_stack or run_pulumi_command_unhandled_apply_failures_return_status or dispatch_propagates_apply_failures" -q` +- `uv run pytest tests/unit/test_script_entrypoints.py::test_run_up_stack_does_not_retry_after_lock tests/unit/test_script_entrypoints.py::test_run_up_stack_rejects_direct_apply_in_github_actions tests/unit/test_script_entrypoints.py::test_run_pulumi_command_unhandled_apply_failures_return_status tests/unit/test_script_entrypoints.py::test_run_pulumi_command_dispatch_propagates_apply_failures -q` +- `uv run pytest tests/pulumi/test_delivery_contracts.py::test_docker_compose_keeps_workspace_and_credentials_contract tests/pulumi/test_delivery_contracts.py::test_aws_ci_loader_reads_secrets_manager_without_pulumi_cloud tests/pulumi/test_delivery_contracts.py::test_multi_account_workflows_use_fixed_aws_ci_config_contracts tests/pulumi/test_delivery_contracts.py::test_multi_account_environment_docs_are_explicit -q` +- `uv run pytest tests/pulumi/test_delivery_contracts.py::test_makefile_keeps_pulumi_guardrails_secret_safe tests/pulumi/test_project_structure.py::test_issue20_cutover_manual_is_secret_safe_and_actionable -q` +- `uv run pytest tests/pulumi/test_project_structure.py::test_ci_guardrails_manual_follow_up_completes_aws_ci_cutover tests/pulumi/test_project_structure.py::test_issue20_cutover_manual_is_secret_safe_and_actionable tests/pulumi/test_project_structure.py::test_issue20_closeout_evidence_tracks_external_manual_steps -q` +- `uv run pytest tests/pulumi/test_ci_guardrails.py::test_well_architected_evidence_workflow_uploads_enforced_reports tests/pulumi/test_delivery_contracts.py::test_multi_account_workflows_use_fixed_aws_ci_config_contracts tests/unit/test_components.py::test_ci_configuration_manages_aws_secret_containers_and_github_read_roles tests/unit/test_mutation_targets.py::test_mutation_target_ci_config_validation_and_lookup_helpers -q` +- `uv run pytest tests/pulumi/test_ci_guardrails.py::test_preview_guardrail_workflow_requires_preview_diff_and_iam_jobs tests/pulumi/test_ci_guardrails.py::test_well_architected_evidence_workflow_uploads_enforced_reports tests/pulumi/test_delivery_contracts.py::test_multi_account_workflows_use_fixed_aws_ci_config_contracts -q` +- `uv run pytest tests/unit/test_validate_ci_environment.py -q` +- `uv run ruff check pulumi/infra/ci_config.py tests/pulumi/test_ci_guardrails.py tests/pulumi/test_delivery_contracts.py tests/unit/test_components.py tests/unit/test_mutation_targets.py scripts/validate_ci_environment.py tests/unit/test_validate_ci_environment.py` +- `uv run ruff check scripts/run_pulumi_command.py tests/unit/test_script_entrypoints.py tests/conftest.py tests/pulumi/test_delivery_contracts.py` +- `uv run ruff check scripts/run_pulumi_command.py tests/unit/test_script_entrypoints.py tests/pulumi/test_delivery_contracts.py` +- `make test-actionlint` +- `make test-yaml` +- `make test-secrets` +- `qlty check` +- `git diff --check` + +## AWS Metadata Checks + +### Test Account + +Local AWS CLI checks for the test account could not prove live state because +the current Codex process still inherits stale `AWS_ACCESS_KEY_ID` and +`AWS_SECRET_ACCESS_KEY` values from its parent environment. `aws configure list` +therefore reports credential sources as `env` and region `eu-central-1` from +`~/.aws/config` in this running session. The values were not recorded. + +```text +aws configure list +access_key: env +secret_key: env +region: eu-central-1 +``` + +The stale shell startup exports were removed from `~/.bashrc`; backup: +`/home/kravtsov/.bashrc.codex-backup-20260525171555`. When those inherited +environment variables are explicitly unset for a command, AWS CLI uses the +shared credentials file and authenticates to production account `933245420672`, +not the test account `891377212104`: + +```text +env -u AWS_ACCESS_KEY_ID -u AWS_SECRET_ACCESS_KEY -u AWS_SESSION_TOKEN aws sts get-caller-identity --output json +Account: 933245420672 +Arn: arn:aws:iam::933245420672:user/codex +``` + +No test-account profile is configured locally yet. No test-account issue should +be closed from this workstation until a maintainer configures a dedicated test +profile and reruns metadata-only verification against account `891377212104`. + +### Production Account + +AWS MCP metadata checks succeeded for account `933245420672` with caller +`arn:aws:iam::933245420672:user/codex`. + +The production operations queue exists: + +| Field | Value | +| --- | --- | +| Queue name | `bootstrap-prod-operations-alerts` | +| Queue ARN | `arn:aws:sqs:eu-central-1:933245420672:bootstrap-prod-operations-alerts` | +| Region | `eu-central-1` | +| Visible messages | `1` | +| Not-visible messages | `0` | +| Delayed messages | `0` | +| SSE | `SqsManagedSseEnabled=true` | + +The production AWS CI config AWS backing resources for this PR are not present yet: + +| Resource | Metadata-only result | +| --- | --- | +| `/bootstrap-infrastructure/ci/prod-preview` | `ResourceNotFoundException` from `secretsmanager:DescribeSecret` | +| `/bootstrap-infrastructure/ci/prod` | `ResourceNotFoundException` from `secretsmanager:DescribeSecret` | +| `GitHubCiConfigRead-bootstrap-infrastructure-prod-preview` | `NoSuchEntityException` from `iam:GetRole` | +| `GitHubCiConfigRead-bootstrap-infrastructure-prod` | `NoSuchEntityException` from `iam:GetRole` | + +That absence is expected before the reviewed Pulumi `prod` stack has been +applied. It also proves production privileged checks cannot be treated as +complete yet. + +## Issue 20 Acceptance Status + +| Requirement | Current evidence | Status | +| --- | --- | --- | +| Fixed privileged AWS Secrets Manager CI secrets | Workflows call `.github/actions/load-aws-ci-env` with fixed suffixes such as `test-pr`, `test`, `prod-preview`, and `prod`; same-repo PR Well-Architected evidence now uses `test-pr` instead of the main-branch `test` trust path. | GitOps implemented | +| AWS Secrets Manager source of truth | Pulumi creates secret containers and GitHub OIDC read roles; docs/tests state values stay in AWS Secrets Manager and must not be copied into Pulumi config, GitHub variables, workflow logs, or docs. | GitOps implemented | +| No GitHub `test` or `prod-preview` deployment environments for non-approval jobs | Workflow contracts and tests enforce only protected production apply uses `environment: prod`. | GitOps implemented | +| Production approval preserved | Protected GitHub `prod` Environment remains the production apply approval boundary. | GitOps implemented; repository-admin verification still required | +| Protected manual reconcile gate | The repository controls helper now prints, applies, and verifies both `prod` and `operations-alert-reconcile`; the manual reconcile workflow requires `operations-alert-reconcile` and has no AWS/OIDC permission. | GitOps implemented; repository-admin verification still required | +| Fork PR isolation | Fork paths stay unprivileged and do not open AWS CI config or request AWS credentials. | GitOps implemented | +| No PR role fallback | Same-repo pull request jobs use `test-pr` and fail fast when `AWS_TEST_PR_CI_CONFIG_ROLE_ARN` is missing instead of falling back to `AWS_TEST_CI_CONFIG_ROLE_ARN`. | GitOps implemented | +| KMS-backed Pulumi secrets provider | Validators require `awskms://` for shared CI stack configuration. | GitOps implemented | +| Live AWS secret load and AWS role assumption | Privileged checks require the AWS-only GitHub variables, read roles, and Secrets Manager payloads. | Manual secure setup required | +| AWS Secrets Manager payloads populated | Pulumi intentionally does not manage `SecretVersion` resources or JSON values. | Manual secure setup required | +| Legacy GitHub Environment variable cleanup | Cleanup workflow is present and confirmation-gated. | Run manually only after AWS Secrets Manager-backed privileged CI is green | + +## Legacy Operations Alert Issues + +Issues `#49`, `#50`, `#52` through `#56`, and `#58` all reference +`bootstrap-test-operations-alerts` in account `891377212104`, +region `eu-central-1`, and AWS Backup `Backup Job State Change` events. None +of those issue bodies contains an `operations-alert:fingerprint=` marker. + +Live `main` does not yet contain the fingerprint-aware triage workflow or the +manual reconcile workflow from PR `#57`, and the live repository does not yet +have an `operations-alert-reconcile` protected Environment. Do not close those +issues automatically. The safe GitOps path is: + +1. Merge and run the fingerprint-aware operations alert triage workflow, or + recover a computed fingerprint from retained raw payloads without exposing + payload contents. +2. Establish one canonical issue whose body contains + `operations-alert:fingerprint=`. +3. Have SRE confirm the legacy issues match the same underlying alert stream, + including state, vault, plan or rule, and protected resource. Retain a + sanitized HTTPS confirmation reference without raw alert payloads, + credentials, stack exports, tokens, or private incident notes. +4. Run **Operations Alert Legacy Reconcile** with the canonical issue and the + confirmed legacy issue list plus the SRE confirmation reference. + +Closure is safe only after SRE confirms the legacy issue list against the +canonical fingerprinted issue and records the sanitized confirmation reference. + +## Manual Secure Steps Still Required + +1. Apply the reviewed Pulumi `test` and `prod` stacks so AWS creates the four + Secrets Manager containers and `GitHubCiConfigRead-*` roles. +2. Populate the four AWS Secrets Manager JSON values in the owning AWS accounts. +3. Keep the six non-secret GitHub repository variables aligned with the + `GitHubCiConfigRead-*` role ARNs and account regions. They are currently set + to the deterministic role names expected from this branch, but successful CI + still requires the AWS roles and trust policies to exist. +4. Refresh local test-account AWS CLI credentials and rerun metadata-only + verification. +5. Have a repository administrator run + `GITHUB_REPOSITORY_CONTROLS_MODE=--apply make configure-github-repository-controls` + and then `GITHUB_REPOSITORY_CONTROLS_MODE=--verify-only make configure-github-repository-controls` + so GitHub has both protected `prod` and `operations-alert-reconcile` + Environments. +6. Rerun privileged PR checks and confirm `Preview` and `Test Account Evidence` + pass on the current head. +7. Run GitHub Environment legacy variable cleanup only after AWS Secrets Manager-backed + privileged CI is green, then delete the temporary cleanup token. +8. Close `#20` only after the successful run and reviewer acceptance of the AWS + Secrets Manager source-of-truth refinement. +9. Close `#49`, `#50`, `#52` through `#56`, and `#58` only through the manual legacy + reconcile workflow after SRE confirmation, including the required + `sre_confirmation_reference`. + +## BMAD/BMALPH Notes + +Canonical planning artifacts remain under `specs/issue-20-pulumi-esc-ci-config/`. +Generated BMAD/BMALPH/Ralph framework state remains intentionally uncommitted +per `AGENTS.md`. diff --git a/specs/issue-20-pulumi-esc-ci-config/epics.md b/specs/issue-20-pulumi-esc-ci-config/epics.md new file mode 100644 index 0000000..67fdeda --- /dev/null +++ b/specs/issue-20-pulumi-esc-ci-config/epics.md @@ -0,0 +1,46 @@ +# Epics: Issue 20 AWS Secrets Manager CI Configuration + +## Epic 1: AWS CI Config Loading and Validation + +- Add a local composite action that authenticates to AWS through GitHub OIDC, + reads a fixed AWS Secrets Manager CI secret, exports environment variables, + and exposes safe outputs for workflow `with:` blocks. +- Add a Python validator for required keys, account ID shape, AWS region shape, + role ARN shape, S3 backend URLs, AWS KMS secrets-provider URLs, stack-list + shape, SNS topic ARNs, and resource names. +- Cover validator behavior with focused unit tests. + +## Epic 2: Workflow Migration + +- Update PR guardrails, test deploy, production deploy, PR command runner, + nightly guardrails, operations alert triage, and Well-Architected evidence to + load AWS Secrets Manager CI config instead of GitHub Environment variables. +- Keep GitHub `environment: prod` only on production apply jobs. +- Remove privileged workflow dependencies on account-local GitHub variables + such as `vars.PULUMI_BACKEND_URL`, `vars.PULUMI_SECRETS_PROVIDER`, and + role/account variables loaded from AWS Secrets Manager. Keep only the minimal + repository variables that identify the config-read role ARNs and regions, and + remove `secrets.PULUMI_ACCESS_TOKEN`. + +## Epic 3: AWS Trust Policy + +- Update Pulumi-generated IAM trust policies to use fixed branch and pull + request subjects for non-approval jobs. +- Keep the GitHub environment subject only for `prod`. +- Bind workflow refs to the expected workflow files. +- Scope operations alert triage to its dedicated workflow and protected branch. + +## Epic 4: Operations Alert Hygiene + +- Render sanitized issue bodies from SNS/SQS-wrapped EventBridge messages. +- Generate stable alert fingerprints that ignore occurrence IDs. +- Search for an existing open issue by fingerprint before creating a new issue. +- Delete SQS messages only after the GitHub issue create/comment operation + succeeds. + +## Epic 5: Documentation and Evidence + +- Update CI, SRE, security, and alert-routing docs to describe the AWS Secrets + Manager-backed CI config contract. +- Record BMAD/BMALPH planning artifacts under `specs/`. +- Call out manual setup and validation steps in the PR and final report. diff --git a/specs/issue-20-pulumi-esc-ci-config/implementation-readiness-report.md b/specs/issue-20-pulumi-esc-ci-config/implementation-readiness-report.md new file mode 100644 index 0000000..87a9ace --- /dev/null +++ b/specs/issue-20-pulumi-esc-ci-config/implementation-readiness-report.md @@ -0,0 +1,62 @@ +# Implementation Readiness Report: Issue 20 AWS Secrets Manager CI Configuration + +## Status + +Ready for targeted validation after local lint, workflow lint, unit tests, and +cloud metadata checks pass. + +## Completed Design Decisions + +- Fixed AWS Secrets Manager CI secret names are committed in workflows; user-controlled event + payloads cannot select an AWS Secrets Manager CI secret. +- AWS Secrets Manager is the source of truth for account-local CI values; the + workflow loader reads those JSON secrets directly through GitHub OIDC. +- Pulumi Cloud and Pulumi ESC are not used for CI configuration. +- Pulumi manages the AWS Secrets Manager secret containers and + `GitHubCiConfigRead-*` roles, but not the JSON secret values. +- GitHub `prod` remains the only deployment environment because it provides + human production approval. +- AWS role trust uses repository ref, pull request, protected production + environment, and workflow-ref conditions. +- AWS CI config validation happens before AWS credentials are requested. +- Operations alert dedupe uses a stable issue fingerprint and preserves the SQS + message until GitHub write success. + +## Validation Plan + +- `uv run ruff check` over changed scripts and tests. +- `uv run pytest` over AWS CI config validator, operations alert triage, component trust, + and Pulumi workflow-contract tests. +- `make test-actionlint` and `make test-yaml`. +- Test account metadata-only AWS CLI checks for caller identity, EventBridge, + SNS, SQS, and AWS Backup alert context. +- Production account metadata-only AWS MCP checks for caller identity and + Pulumi bootstrap role metadata. + +## Known External Dependencies + +- AWS Secrets Manager JSON values must be populated outside this PR after the + Pulumi-managed secret containers exist. +- AWS Secrets Manager CI secrets and `GitHubCiConfigRead-*` roles must exist in + the owning AWS accounts. +- GitHub OIDC and GitHub-to-AWS OIDC trust must be enabled without moving + account-local values out of AWS Secrets Manager. +- GitHub `prod` Environment reviewer and branch restrictions require repository + admin rights. +- AWS account trust-policy changes require applying the Pulumi stack through the + existing GitOps process. + +The current PR and AWS metadata audit is retained in +`current-closeout-evidence-2026-05-25.md`; it records the failing +invalid local test-account AWS token, and missing production Secrets Manager +containers/read roles as external closeout dependencies. + +## Residual Risks + +- Existing open PRs may need to be rebased or rerun after this trust-model + change lands. +- Historical operations alert duplicate issues must be closed through + **Operations Alert Legacy Reconcile** after the canonical fingerprint behavior + is merged and SRE records a sanitized HTTPS confirmation reference. +- If local test-account AWS credentials are expired, metadata verification is + blocked until the maintainer refreshes them. diff --git a/specs/issue-20-pulumi-esc-ci-config/prd.md b/specs/issue-20-pulumi-esc-ci-config/prd.md new file mode 100644 index 0000000..d043de1 --- /dev/null +++ b/specs/issue-20-pulumi-esc-ci-config/prd.md @@ -0,0 +1,75 @@ +# PRD: Issue 20 AWS Secrets Manager CI Configuration + +## Problem + +Privileged CI workflows previously read AWS account IDs, role ARNs, Pulumi +backend URLs, secrets-provider URIs, and stack lists from GitHub Environment +variables. That made non-approval environments (`test`, `prod-preview`) double +as both account configuration and trust boundaries, and it left manual +configuration drift outside GitOps review. + +## Goals + +- Move privileged CI account configuration into fixed AWS Secrets Manager JSON + secrets. +- Manage the AWS Secrets Manager secret containers and `GitHubCiConfigRead-*` + roles through Pulumi while leaving secret JSON values human-populated in AWS + Secrets Manager. +- Keep the protected GitHub `prod` Environment only as a production approval + boundary. +- Authenticate to AWS with GitHub OIDC; do not introduce long-lived AWS keys or + Pulumi access tokens. +- Validate AWS Secrets Manager-loaded configuration before deployment + credentials are requested. +- Update AWS OIDC trust to fixed repository subjects and workflow refs, with a + GitHub environment subject only for production apply. +- Deduplicate operations alert issues created from repeated AWS Backup failure + notifications. +- Document every manual setup step that cannot be performed safely from GitOps. + +## Non-Goals + +- Applying production infrastructure changes from this feature branch. +- Migrating Pulumi state secrets away from the existing AWS KMS provider. +- Replacing GitHub branch protection or production reviewer controls. + +## Required AWS CI Config Secrets + +| CI suffix | AWS Secrets Manager secret ID | Purpose | +| --- | --- | +| `test-pr` | `/bootstrap-infrastructure/ci/test-pr` | Trusted PR preview, IAM validation, and PR evidence collection | +| `test` | `/bootstrap-infrastructure/ci/test` | Test apply, drift, operations triage, and evidence | +| `prod-preview` | `/bootstrap-infrastructure/ci/prod-preview` | Production preview, IAM validation, and drift | +| `prod` | `/bootstrap-infrastructure/ci/prod` | Production apply after protected GitHub approval | + +Workflow call sites pass only fixed suffixes such as `test-pr` or `prod`. +Account-local values remain AWS Secrets Manager-owned and are loaded by the +local AWS CI action through GitHub OIDC. + +## Acceptance Criteria + +- Privileged workflows load one fixed AWS Secrets Manager CI secret through a + local composite action and never derive the suffix from PR/comment payloads. +- Workflows keep only the minimal repository variables needed to locate the + account-local config-read roles and regions; account-local AWS values move to + AWS Secrets Manager, and workflows do not use GitHub `test` or `prod-preview` + deployment environments or `secrets.PULUMI_ACCESS_TOKEN`. +- Production apply jobs are the only privileged jobs bound to GitHub + `environment: prod`. +- Pulumi component tests prove non-production automation roles do not trust + `environment:test` and production roles still trust `environment:prod`. +- Operations alert triage comments on an existing open canonical issue when a + stable alert fingerprint already exists. +- Operator documentation describes AWS Secrets Manager-backed CI keys, OIDC + trust, stack migration, and manual secure setup steps. +- Pulumi outputs expose the AWS Secrets Manager container IDs and + `GitHubCiConfigRead-*` role ARNs needed to configure GitHub repository + variables. + +## BMAD/BMALPH Notes + +`bmalph doctor` and `bmalph status` were run before implementation and reported +that this repository is not initialized for BMad/BMALPH. `bmalph init --dry-run` +was used to inspect generated state paths. Per repository guidance, this PR +commits the planning artifacts under `specs/` and does not commit `_bmad/`, +`.ralph/`, or generated framework state. diff --git a/specs/pr-comment-pulumi-promotion/architecture.md b/specs/pr-comment-pulumi-promotion/architecture.md index f036729..33b94d6 100644 --- a/specs/pr-comment-pulumi-promotion/architecture.md +++ b/specs/pr-comment-pulumi-promotion/architecture.md @@ -21,9 +21,9 @@ - The issue-comment workflow does not check out PR code. - AWS credentials exist only in the trusted runner jobs bound to GitHub environments. -- Each OIDC job uses the environment role configured for its stage: preview, - apply, or drift, with apply/drift optionally falling back to the preview role - when dedicated role variables are not set. +- Each OIDC job uses the explicit account-local AWS Secrets Manager value for + its stage: preview, apply, or drift. Missing role values fail before AWS + credentials are requested. - Production jobs are impossible unless the same workflow run has already completed test apply and test post-apply drift successfully for that PR head SHA. diff --git a/tests/conftest.py b/tests/conftest.py index 8bade39..39165c9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -41,6 +41,7 @@ def _apply_mock_resource_defaults( "aws:ecr/repository:Repository": _mock_ecr_repository, "aws:kms/key:Key": _mock_kms_key, "aws:kms/alias:Alias": _mock_kms_alias, + "aws:secretsmanager/secret:Secret": _mock_secretsmanager_secret, "aws:backup/vault:Vault": _mock_backup_vault, "aws:sns/topic:Topic": _mock_sns_topic, "aws:sns/topicSubscription:TopicSubscription": _mock_sns_topic_subscription, @@ -81,11 +82,14 @@ def _mock_iam_policy(name: str, inputs: dict[str, Any], state: dict[str, Any]) - def _mock_oidc_provider( - _name: str, _inputs: dict[str, Any], state: dict[str, Any] + _name: str, inputs: dict[str, Any], state: dict[str, Any] ) -> None: + provider_url = str( + inputs.get("url") or "https://token.actions.githubusercontent.com" + ).removeprefix("https://") state.setdefault( "arn", - "arn:aws:iam::123456789012:oidc-provider/token.actions.githubusercontent.com", + f"arn:aws:iam::123456789012:oidc-provider/{provider_url}", ) @@ -111,6 +115,17 @@ def _mock_kms_alias(name: str, inputs: dict[str, Any], state: dict[str, Any]) -> state.setdefault("arn", f"arn:aws:kms:us-east-1:123456789012:{alias_name}") +def _mock_secretsmanager_secret( + name: str, inputs: dict[str, Any], state: dict[str, Any] +) -> None: + secret_name = inputs.get("name") or name + state.setdefault("name", secret_name) + state.setdefault( + "arn", + f"arn:aws:secretsmanager:us-east-1:123456789012:secret:{secret_name}-mock", + ) + + def _mock_backup_vault( name: str, inputs: dict[str, Any], state: dict[str, Any] ) -> None: @@ -316,8 +331,6 @@ def pulumi_automation_environment(tmp_path_factory: pytest.TempPathFactory) -> N os.environ.setdefault("PULUMI_PYTHON_CMD", python_cmd) backend_url = os.environ.get("PULUMI_BACKEND_URL", "") - if os.environ.get("PULUMI_ACCESS_TOKEN"): - return if backend_url: return @@ -326,9 +339,11 @@ def pulumi_automation_environment(tmp_path_factory: pytest.TempPathFactory) -> N env = os.environ.copy() env["PULUMI_HOME"] = str(backend_dir) + env.pop("PULUMI_ACCESS_TOKEN", None) subprocess.run(["pulumi", "login", backend_uri], check=True, env=env, timeout=30) + os.environ.pop("PULUMI_ACCESS_TOKEN", None) os.environ.setdefault("PULUMI_HOME", str(backend_dir)) os.environ.setdefault("PULUMI_BACKEND_URL", backend_uri) @@ -343,8 +358,6 @@ def ensure_pulumi_cli() -> None: @pytest.fixture(scope="session") def ensure_pulumi_secrets_provider() -> None: """Require an explicit non-passphrase secrets provider for automation tests.""" - if os.environ.get("PULUMI_ACCESS_TOKEN"): - return if not os.environ.get("PULUMI_SECRETS_PROVIDER"): pytest.skip( "Set PULUMI_SECRETS_PROVIDER to run Pulumi automation tests without " diff --git a/tests/pulumi/test_ci_guardrails.py b/tests/pulumi/test_ci_guardrails.py index 50cea9e..09e0a28 100644 --- a/tests/pulumi/test_ci_guardrails.py +++ b/tests/pulumi/test_ci_guardrails.py @@ -141,16 +141,27 @@ def test_preview_guardrail_workflow_requires_preview_diff_and_iam_jobs() -> None "", ) destructive_diff_job_if = " ".join(jobs["destructive_diff"]["if"].split()) - pr_backend_expression = ( - "${{ github.event_name == 'pull_request' && " - + "vars.PULUMI_PR_BACKEND_URL || vars.PULUMI_BACKEND_URL || " - + "vars.PULUMI_PR_BACKEND_URL }}" + preview_ci_config_step = next( + step + for step in jobs["preview"]["steps"] + if step.get("uses") == "./.github/actions/load-aws-ci-env" + ) + iam_ci_config_step = next( + step + for step in jobs["iam_validation"]["steps"] + if step.get("uses") == "./.github/actions/load-aws-ci-env" + ) + preview_ci_config_target_step = next( + step + for step in jobs["preview"]["steps"] + if step.get("name") == "Select test AWS CI configuration" ) - pr_stack_expression = ( - "${{ github.event_name == 'pull_request' && " - + "vars.PULUMI_PR_PREVIEW_STACKS || vars.PULUMI_PREVIEW_STACKS || " - + "vars.PULUMI_PR_PREVIEW_STACKS }}" + iam_ci_config_target_step = next( + step + for step in jobs["iam_validation"]["steps"] + if step.get("name") == "Select test AWS CI configuration" ) + pr_ci_environment = "${{ steps.ci_config_target.outputs.environment }}" assert workflow["concurrency"]["cancel-in-progress"] is True assert "environment" not in jobs["preview_mode"] # nosec B101 @@ -160,13 +171,21 @@ def test_preview_guardrail_workflow_requires_preview_diff_and_iam_jobs() -> None == "${{ needs.preview_mode.outputs.privileged == 'true' }}" ) assert jobs["preview"]["needs"] == ["preview_mode"] # nosec B101 - assert jobs["preview"]["environment"] == "test" # nosec B101 - assert ( # nosec B101 - jobs["preview"]["env"]["PULUMI_BACKEND_URL"] == pr_backend_expression - ) - assert ( # nosec B101 - jobs["preview"]["env"]["PULUMI_PREVIEW_STACKS"] == pr_stack_expression - ) + assert "environment" not in jobs["preview"] # nosec B101 + assert preview_ci_config_step["with"]["environment"] == pr_ci_environment # nosec B101 + assert iam_ci_config_step["with"]["environment"] == pr_ci_environment # nosec B101 + assert preview_ci_config_step["with"]["config-role-arn"] == ( # nosec B101 + "${{ steps.ci_config_target.outputs.config-role-arn }}" + ) + assert iam_ci_config_step["with"]["config-role-arn"] == ( # nosec B101 + "${{ steps.ci_config_target.outputs.config-role-arn }}" + ) + for target_step in (preview_ci_config_target_step, iam_ci_config_target_step): + assert "AWS_TEST_PR_CI_CONFIG_ROLE_ARN" in target_step["run"] # nosec B101 + assert "AWS_TEST_CI_CONFIG_ROLE_ARN" in target_step["run"] # nosec B101 + assert "must be set" in target_step["run"] # nosec B101 + assert "PULUMI_BACKEND_URL" in preview_ci_config_step["with"]["required-keys"] # nosec B101 + assert "PULUMI_PREVIEW_STACKS" in preview_ci_config_step["with"]["required-keys"] # nosec B101 assert jobs["preview"]["permissions"] == { # nosec B101 "contents": "read", "id-token": "write", @@ -180,6 +199,7 @@ def test_preview_guardrail_workflow_requires_preview_diff_and_iam_jobs() -> None == "${{ needs.preview_unprivileged.result == 'success' }}" ) assert "environment" not in jobs["iam_validation_unprivileged"] # nosec B101 + assert "environment" not in jobs["iam_validation"] # nosec B101 assert jobs["iam_validation_unprivileged"]["permissions"] == {"contents": "read"} # nosec B101 assert destructive_diff_job_if == destructive_diff_if # nosec B101 assert jobs["destructive_diff"]["needs"] == [ # nosec B101 @@ -204,16 +224,30 @@ def test_preview_guardrail_workflow_requires_preview_diff_and_iam_jobs() -> None assert preview_upload_step["with"]["name"] == "pulumi-preview" # nosec B101 assert "if" not in preview_oidc_step # nosec B101 assert ( # nosec B101 - preview_oidc_step["with"]["role-to-assume"] == "${{ env.AWS_PREVIEW_ROLE_ARN }}" + preview_oidc_step["with"]["role-to-assume"] + == "${{ steps.ci_config.outputs.aws-preview-role-arn }}" ) assert ( # nosec B101 - preview_oidc_step["with"]["allowed-account-ids"] == "${{ env.AWS_ACCOUNT_ID }}" + preview_oidc_step["with"]["allowed-account-ids"] + == "${{ steps.ci_config.outputs.aws-account-id }}" + ) + assert ( # nosec B101 + preview_oidc_step["with"]["aws-region"] + == "${{ steps.ci_config.outputs.aws-region }}" ) - assert preview_oidc_step["with"]["aws-region"] == "${{ env.AWS_REGION }}" # nosec B101 assert "if" not in iam_oidc_step # nosec B101 - assert iam_oidc_step["with"]["role-to-assume"] == "${{ env.AWS_PREVIEW_ROLE_ARN }}" # nosec B101 - assert iam_oidc_step["with"]["aws-region"] == "${{ env.AWS_REGION }}" # nosec B101 - assert iam_oidc_step["with"]["allowed-account-ids"] == "${{ env.AWS_ACCOUNT_ID }}" # nosec B101 + assert ( # nosec B101 + iam_oidc_step["with"]["role-to-assume"] + == "${{ steps.ci_config.outputs.aws-preview-role-arn }}" + ) + assert ( # nosec B101 + iam_oidc_step["with"]["aws-region"] + == "${{ steps.ci_config.outputs.aws-region }}" + ) + assert ( # nosec B101 + iam_oidc_step["with"]["allowed-account-ids"] + == "${{ steps.ci_config.outputs.aws-account-id }}" + ) assert "make publish-pulumi-preview-summary" in preview_run_step["run"] # nosec B101 assert "make test-preview-unprivileged" in unprivileged_preview_run # nosec B101 assert preview_run_step["env"] == { # nosec B101 @@ -331,6 +365,16 @@ def test_nightly_guardrails_workflow_covers_drift_and_scorecard() -> None: ] test_drift_steps = jobs["test_drift_detection"]["steps"] prod_drift_steps = jobs["prod_drift_detection"]["steps"] + test_ci_config_step = next( + step + for step in test_drift_steps + if step.get("uses") == "./.github/actions/load-aws-ci-env" + ) + prod_ci_config_step = next( + step + for step in prod_drift_steps + if step.get("uses") == "./.github/actions/load-aws-ci-env" + ) preflight_step = next( ( step @@ -351,16 +395,16 @@ def test_nightly_guardrails_workflow_covers_drift_and_scorecard() -> None: jobs["prod_drift_detection"]["concurrency"]["group"] == "bootstrap-infrastructure-prod-state" ) - assert jobs["test_drift_detection"]["environment"] == "test" # nosec B101 - assert jobs["prod_drift_detection"]["environment"] == "prod-preview" # nosec B101 + assert "environment" not in jobs["test_drift_detection"] # nosec B101 + assert "environment" not in jobs["prod_drift_detection"] # nosec B101 + assert test_ci_config_step["with"]["environment"] == "test" # nosec B101 + assert prod_ci_config_step["with"]["environment"] == "prod-preview" # nosec B101 expected_drift_permissions = { "contents": "read", "id-token": "write", } - expected_expression = "".join(("${{ secrets.", "PULUMI_ACCESS_", "TOKEN", " }}")) assert jobs["test_drift_detection"]["permissions"] == expected_drift_permissions # nosec B101 - drift_access_token = jobs["test_drift_detection"]["env"]["PULUMI_ACCESS_TOKEN"] - assert drift_access_token == expected_expression # nosec B101 + assert jobs["test_drift_detection"]["env"] == {"PULUMI_SKIP_UPDATE_CHECK": "true"} # nosec B101 assert preflight_step is not None, "drift preflight step not found" # nosec B101 assert "AWS_DRIFT_ROLE_ARN" in preflight_step["run"] # nosec B101 assert "PULUMI_BACKEND_URL" in preflight_step["run"] # nosec B101 @@ -373,9 +417,18 @@ def test_nightly_guardrails_workflow_covers_drift_and_scorecard() -> None: for step in drift_steps if step.get("uses", "").startswith("aws-actions/configure-aws-credentials@") ) - assert oidc_step["with"]["role-to-assume"] == "${{ env.AWS_DRIFT_ROLE_ARN }}" # nosec B101 - assert oidc_step["with"]["aws-region"] == "${{ env.AWS_REGION }}" # nosec B101 - assert oidc_step["with"]["allowed-account-ids"] == "${{ env.AWS_ACCOUNT_ID }}" # nosec B101 + assert ( # nosec B101 + oidc_step["with"]["role-to-assume"] + == "${{ steps.ci_config.outputs.aws-drift-role-arn }}" + ) + assert ( # nosec B101 + oidc_step["with"]["aws-region"] + == "${{ steps.ci_config.outputs.aws-region }}" + ) + assert ( # nosec B101 + oidc_step["with"]["allowed-account-ids"] + == "${{ steps.ci_config.outputs.aws-account-id }}" + ) assert any(step.get("run") == "make test-drift" for step in drift_steps) # nosec B101 assert any("ossf/scorecard-action@" in uses for uses in scorecard_uses) # nosec B101 assert any("upload-sarif@" in uses for uses in scorecard_uses) # nosec B101 @@ -403,6 +456,16 @@ def test_well_architected_evidence_workflow_uploads_enforced_reports() -> None: for step in evidence_steps if step.get("uses", "").startswith("aws-actions/configure-aws-credentials@") ) + ci_config_step = next( + step + for step in evidence_steps + if step.get("uses") == "./.github/actions/load-aws-ci-env" + ) + ci_config_target_step = next( + step + for step in evidence_steps + if step.get("name") == "Select test AWS CI configuration" + ) checkout_step = next( step for step in evidence_steps @@ -444,7 +507,20 @@ def test_well_architected_evidence_workflow_uploads_enforced_reports() -> None: "privileged": "${{ steps.evidence_mode.outputs.privileged }}" } assert "Fork pull request detected" in mode_step["run"] # nosec B101 - assert jobs["test_account_evidence"]["environment"] == "test" # nosec B101 + assert "environment" not in jobs["test_account_evidence"] # nosec B101 + assert ( # nosec B101 + ci_config_step["with"]["environment"] + == "${{ steps.ci_config_target.outputs.environment }}" + ) + assert ( # nosec B101 + ci_config_step["with"]["config-role-arn"] + == "${{ steps.ci_config_target.outputs.config-role-arn }}" + ) + assert "AWS_TEST_PR_CI_CONFIG_ROLE_ARN" in ci_config_target_step["run"] # nosec B101 + assert "AWS_TEST_CI_CONFIG_ROLE_ARN" in ci_config_target_step["run"] # nosec B101 + assert "must be set" in ci_config_target_step["run"] # nosec B101 + assert "||" not in ci_config_step["with"]["config-role-arn"] # nosec B101 + assert "OPERATIONS_TOPIC_ARN" in ci_config_step["with"]["required-keys"] # nosec B101 assert jobs["test_account_evidence"]["permissions"] == { # nosec B101 "contents": "read", "id-token": "write", @@ -505,8 +581,14 @@ def test_well_architected_evidence_workflow_uploads_enforced_reports() -> None: jobs["test_account_evidence"]["env"]["PRODUCTION_DR_OWNER_EVIDENCE"] == production_dr_evidence ) - assert oidc_step["with"]["role-to-assume"] == "${{ env.AWS_PREVIEW_ROLE_ARN }}" # nosec B101 - assert oidc_step["with"]["allowed-account-ids"] == "${{ env.AWS_ACCOUNT_ID }}" # nosec B101 + assert ( # nosec B101 + oidc_step["with"]["role-to-assume"] + == "${{ steps.ci_config.outputs.aws-preview-role-arn }}" + ) + assert ( # nosec B101 + oidc_step["with"]["allowed-account-ids"] + == "${{ steps.ci_config.outputs.aws-account-id }}" + ) assert "uv==0.9.21" in " ".join( # nosec B101 step.get("run", "") for step in evidence_steps ) @@ -590,6 +672,8 @@ def test_new_workflows_keep_actions_pinned_to_full_shas() -> None: uses = step.get("uses") if uses is None: continue + if uses.startswith("./"): + continue assert ACTION_SHA_REF.match(uses), ( f"{workflow_name} must pin `{uses}` to a full commit SHA" ) diff --git a/tests/pulumi/test_delivery_contracts.py b/tests/pulumi/test_delivery_contracts.py index 024b3f6..1d90fc5 100644 --- a/tests/pulumi/test_delivery_contracts.py +++ b/tests/pulumi/test_delivery_contracts.py @@ -10,6 +10,9 @@ PROJECT_ROOT = Path(__file__).resolve().parents[2] WORKFLOWS_DIR = PROJECT_ROOT / ".github" / "workflows" +AWS_CI_LOADER_ACTION = ( + PROJECT_ROOT / ".github" / "actions" / "load-aws-ci-env" / "action.yml" +) DOCKERFILE = PROJECT_ROOT / "Dockerfile" DOCKER_COMPOSE = PROJECT_ROOT / "docker-compose.yml" SECRETS_DOC = PROJECT_ROOT / "docs" / "github-actions-secrets.md" @@ -234,7 +237,6 @@ def test_docker_compose_keeps_workspace_and_credentials_contract() -> None: assert service["env_file"] == [{"path": ".env", "required": False}] assert service["environment"] == [ - "PULUMI_ACCESS_TOKEN", "PULUMI_BACKEND_URL", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", @@ -496,6 +498,9 @@ def test_makefile_keeps_pulumi_guardrails_secret_safe() -> None: assert "stack change-secrets-provider" not in pulumi_command_combined_text # nosec B101 assert '"--save-plan"' in pulumi_command_combined_text # nosec B101 assert '"summarize"' in pulumi_command_combined_text # nosec B101 + assert "direct Pulumi up is disabled in GitHub Actions" in ( # nosec B101 + pulumi_command_combined_text + ) def test_bats_suite_covers_every_public_make_target() -> None: @@ -704,97 +709,374 @@ def test_actions_are_pinned_to_full_commit_shas() -> None: ) -def test_multi_account_workflows_use_environment_scoped_oidc_contracts() -> None: - """Bind privileged AWS jobs to GitHub environments and account allow-lists.""" - expected_environments = {"test", "prod-preview", "prod"} - preview_role = "${{ env.AWS_PREVIEW_ROLE_ARN }}" - apply_role = "${{ env.AWS_APPLY_ROLE_ARN }}" - drift_role = "${{ env.AWS_DRIFT_ROLE_ARN }}" - alert_triage_role = "${{ env.AWS_OPERATIONS_ALERT_TRIAGE_ROLE_ARN }}" +def test_aws_ci_loader_reads_secrets_manager_without_pulumi_cloud() -> None: + """Load CI config directly from AWS Secrets Manager through GitHub OIDC.""" + action_text = AWS_CI_LOADER_ACTION.read_text(encoding="utf-8") + action = yaml.safe_load(action_text) + resolve_step = action["runs"]["steps"][0] + configure_aws_step = next( + step + for step in action["runs"]["steps"] + if step.get("name") == "Configure AWS config-read credentials" + ) + load_step = next( + step + for step in action["runs"]["steps"] + if step.get("name") == "Load AWS Secrets Manager CI values" + ) + install_uv_step = next( + step + for step in action["runs"]["steps"] + if step.get("name") == "Install uv for validation" + ) + validate_step = next( + step + for step in action["runs"]["steps"] + if step.get("name") == "Validate AWS Secrets Manager CI environment" + ) + boundary_step = next( + step + for step in action["runs"]["steps"] + if step.get("name") == "Record AWS Secrets Manager source-of-truth boundary" + ) + + assert action["name"] == "Load AWS Secrets Manager CI environment" # nosec B101 + assert "organization" not in action["inputs"] # nosec B101 + assert "config-role-arn" in action["inputs"] # nosec B101 + assert "aws-region" in action["inputs"] # nosec B101 + assert "pulumi/auth-actions" not in action_text # nosec B101 + assert "pulumi/esc-action" not in action_text # nosec B101 + assert "PULUMI_ESC" not in action_text # nosec B101 + assert ".github/ci/pulumi-esc.json" not in action_text # nosec B101 + assert "secretsmanager get-secret-value" in load_step["run"] # nosec B101 + assert "--query SecretString" in load_step["run"] # nosec B101 + assert "Secret ID:" in boundary_step["run"] # nosec B101 + assert "Pulumi Cloud/ESC: not used" in boundary_step["run"] # nosec B101 + assert action["runs"]["steps"].index(resolve_step) < action["runs"]["steps"].index( # nosec B101 + boundary_step + ) + assert action["runs"]["steps"].index(boundary_step) < action["runs"]["steps"].index( # nosec B101 + configure_aws_step + ) + assert configure_aws_step["with"]["role-to-assume"] == ( # nosec B101 + "${{ inputs.config-role-arn }}" + ) + assert configure_aws_step["with"]["allowed-account-ids"] == ( # nosec B101 + "${{ steps.aws-target.outputs.config_account_id }}" + ) + assert "GITHUB_STEP_SUMMARY" in boundary_step["run"] # nosec B101 + assert "version" in install_uv_step["with"] # nosec B101 + assert ( # nosec B101 + "uv run python scripts/validate_ci_environment.py" in validate_step["run"] + ) + assert "--purpose" in validate_step["run"] # nosec B101 + assert ( # nosec B101 + "python3 scripts/validate_ci_environment.py" not in validate_step["run"] + ) + + +def test_multi_account_workflows_use_fixed_aws_ci_config_contracts() -> None: + """Load privileged CI config from fixed AWS Secrets Manager secrets.""" + test_pr_environment = "${{ steps.ci_config_target.outputs.environment }}" expected_contracts_by_job = { - ("nightly-guardrails.yml", "test_drift_detection"): ("test", drift_role), + ("nightly-guardrails.yml", "test_drift_detection"): ( + "test", + "${{ steps.ci_config.outputs.aws-drift-role-arn }}", + ), ("nightly-guardrails.yml", "prod_drift_detection"): ( "prod-preview", - drift_role, + "${{ steps.ci_config.outputs.aws-drift-role-arn }}", ), ("well-architected-evidence.yml", "test_account_evidence"): ( - "test", - preview_role, + test_pr_environment, + "${{ steps.ci_config.outputs.aws-preview-role-arn }}", ), ("operations-alert-triage.yml", "triage_operations_alerts"): ( "test", - alert_triage_role, + "${{ steps.ci_config.outputs.aws-operations-alert-triage-role-arn }}", + ), + ("pulumi-pr-command-runner.yml", "test_preview"): ( + "test", + "${{ steps.ci_config.outputs.aws-preview-role-arn }}", ), - ("pulumi-pr-command-runner.yml", "test_preview"): ("test", preview_role), ("pulumi-pr-command-runner.yml", "test_iam_validation"): ( "test", - preview_role, + "${{ steps.ci_config.outputs.aws-preview-role-arn }}", + ), + ("pulumi-pr-command-runner.yml", "test_apply"): ( + "test", + "${{ steps.ci_config.outputs.aws-apply-role-arn }}", ), - ("pulumi-pr-command-runner.yml", "test_apply"): ("test", apply_role), ("pulumi-pr-command-runner.yml", "test_post_apply_drift"): ( "test", - drift_role, + "${{ steps.ci_config.outputs.aws-drift-role-arn }}", ), ("pulumi-pr-command-runner.yml", "prod_preview"): ( "prod-preview", - preview_role, + "${{ steps.ci_config.outputs.aws-preview-role-arn }}", ), ("pulumi-pr-command-runner.yml", "prod_iam_validation"): ( "prod-preview", - preview_role, + "${{ steps.ci_config.outputs.aws-preview-role-arn }}", + ), + ("pulumi-pr-command-runner.yml", "prod_apply"): ( + "prod", + "${{ steps.ci_config.outputs.aws-apply-role-arn }}", ), - ("pulumi-pr-command-runner.yml", "prod_apply"): ("prod", apply_role), ("pulumi-pr-command-runner.yml", "prod_post_apply_drift"): ( "prod-preview", - drift_role, + "${{ steps.ci_config.outputs.aws-drift-role-arn }}", + ), + ("pulumi-pr-guardrails.yml", "preview"): ( + test_pr_environment, + "${{ steps.ci_config.outputs.aws-preview-role-arn }}", + ), + ("pulumi-pr-guardrails.yml", "iam_validation"): ( + test_pr_environment, + "${{ steps.ci_config.outputs.aws-preview-role-arn }}", + ), + ("pulumi-prod.yml", "preview"): ( + "prod-preview", + "${{ steps.ci_config.outputs.aws-preview-role-arn }}", + ), + ("pulumi-prod.yml", "iam_validation"): ( + "prod-preview", + "${{ steps.ci_config.outputs.aws-preview-role-arn }}", + ), + ("pulumi-prod.yml", "apply"): ( + "prod", + "${{ steps.ci_config.outputs.aws-apply-role-arn }}", + ), + ("pulumi-prod.yml", "post_apply_drift"): ( + "prod-preview", + "${{ steps.ci_config.outputs.aws-drift-role-arn }}", + ), + ("pulumi-test-deploy.yml", "preview"): ( + "test", + "${{ steps.ci_config.outputs.aws-preview-role-arn }}", + ), + ("pulumi-test-deploy.yml", "iam_validation"): ( + "test", + "${{ steps.ci_config.outputs.aws-preview-role-arn }}", + ), + ("pulumi-test-deploy.yml", "apply"): ( + "test", + "${{ steps.ci_config.outputs.aws-apply-role-arn }}", + ), + ("pulumi-test-deploy.yml", "post_apply_drift"): ( + "test", + "${{ steps.ci_config.outputs.aws-drift-role-arn }}", ), - ("pulumi-pr-guardrails.yml", "preview"): ("test", preview_role), - ("pulumi-pr-guardrails.yml", "iam_validation"): ("test", preview_role), - ("pulumi-prod.yml", "preview"): ("prod-preview", preview_role), - ("pulumi-prod.yml", "iam_validation"): ("prod-preview", preview_role), - ("pulumi-prod.yml", "apply"): ("prod", apply_role), - ("pulumi-prod.yml", "post_apply_drift"): ("prod-preview", drift_role), - ("pulumi-test-deploy.yml", "preview"): ("test", preview_role), - ("pulumi-test-deploy.yml", "iam_validation"): ("test", preview_role), - ("pulumi-test-deploy.yml", "apply"): ("test", apply_role), - ("pulumi-test-deploy.yml", "post_apply_drift"): ("test", drift_role), } - environment_jobs = [ - (workflow_name, job_name, job, environment_name) - for workflow_name, job_name, job in _workflow_jobs() - if (environment_name := _environment_name(job)) is not None - ] + approval_only_environment_jobs = { + ("pulumi-prod.yml", "apply"), + ("pulumi-pr-command-runner.yml", "prod_apply"), + } + forbidden_job_env_keys = { + "AWS_ACCOUNT_ID", + "AWS_REGION", + "AWS_DEFAULT_REGION", + "AWS_PREVIEW_ROLE_ARN", + "AWS_APPLY_ROLE_ARN", + "AWS_DRIFT_ROLE_ARN", + "AWS_OPERATIONS_ALERT_TRIAGE_ROLE_ARN", + "PULUMI_BACKEND_URL", + "PULUMI_SECRETS_PROVIDER", + "PULUMI_PREVIEW_STACKS", + "PULUMI_DRIFT_STACKS", + "PULUMI_ACCESS_TOKEN", + } + expected_config_role_by_environment = { + test_pr_environment: "${{ steps.ci_config_target.outputs.config-role-arn }}", + "test": "${{ vars.AWS_TEST_CI_CONFIG_ROLE_ARN }}", + "prod-preview": "${{ vars.AWS_PROD_PREVIEW_CI_CONFIG_ROLE_ARN }}", + "prod": "${{ vars.AWS_PROD_CI_CONFIG_ROLE_ARN }}", + } + expected_region_by_environment = { + test_pr_environment: "${{ vars.AWS_TEST_REGION }}", + "test": "${{ vars.AWS_TEST_REGION }}", + "prod-preview": "${{ vars.AWS_PROD_REGION }}", + "prod": "${{ vars.AWS_PROD_REGION }}", + } - assert {item[3] for item in environment_jobs} == expected_environments # nosec B101 + for workflow_name, job_name, job in _workflow_jobs(): + workflow_job = (workflow_name, job_name) + environment_name = _environment_name(job) + if workflow_job in approval_only_environment_jobs: + assert environment_name == "prod" # nosec B101 + elif workflow_job in expected_contracts_by_job: + assert environment_name is None # nosec B101 - for workflow_name, job_name, job, environment_name in environment_jobs: - assert environment_name in expected_environments # nosec B101 + if workflow_job not in expected_contracts_by_job: + continue job_env = job.get("env", {}) - role_message = ( - f"{workflow_name}:{job_name} must use environment-scoped " - "purpose-specific role variables directly from the OIDC step" + assert not forbidden_job_env_keys.intersection(job_env), workflow_job + + expected_ci_environment, expected_role = expected_contracts_by_job[workflow_job] + ci_config_step = next( + step + for step in job.get("steps", []) + if step.get("uses") == "./.github/actions/load-aws-ci-env" + ) + if expected_ci_environment == test_pr_environment: + ci_config_target_step = next( + step + for step in job.get("steps", []) + if step.get("name") == "Select test AWS CI configuration" + ) + assert ( + "AWS_TEST_PR_CI_CONFIG_ROLE_ARN" + in ( # nosec B101 + ci_config_target_step["run"] + ) + ) + assert ( + "AWS_TEST_CI_CONFIG_ROLE_ARN" + in ( # nosec B101 + ci_config_target_step["run"] + ) + ) + assert "must be set" in ci_config_target_step["run"] # nosec B101 + assert ci_config_step["id"] == "ci_config" # nosec B101 + assert ci_config_step["with"]["environment"] == expected_ci_environment # nosec B101 + assert ( + ci_config_step["with"]["config-role-arn"] + == ( # nosec B101 + expected_config_role_by_environment[expected_ci_environment] + ) ) - assert "AWS_OIDC_ROLE_ARN" not in job_env, role_message # nosec B101 + assert "||" not in ci_config_step["with"]["config-role-arn"] # nosec B101 + assert ( + ci_config_step["with"]["aws-region"] + == ( # nosec B101 + expected_region_by_environment[expected_ci_environment] + ) + ) + assert "organization" not in ci_config_step["with"] # nosec B101 + assert "/" not in ci_config_step["with"]["environment"] # nosec B101 + assert "inputs." not in ci_config_step["with"]["environment"] # nosec B101 + assert "client_payload" not in ci_config_step["with"]["environment"] # nosec B101 + required_keys = ci_config_step["with"]["required-keys"] + for required_key in ("AWS_ACCOUNT_ID", "AWS_REGION"): + assert required_key in required_keys # nosec B101 + assert "PULUMI_SECRETS_PROVIDER" not in job_env # nosec B101 + assert job.get("permissions", {}).get("id-token") == "write" # nosec B101 oidc_steps = [ step for step in job.get("steps", []) if step.get("uses", "").startswith("aws-actions/configure-aws-credentials@") ] - if not oidc_steps: - continue - - expected_environment, expected_role = expected_contracts_by_job[ - (workflow_name, job_name) - ] - assert environment_name == expected_environment # nosec B101 - assert job.get("permissions", {}).get("id-token") == "write" # nosec B101 + assert oidc_steps, workflow_job for step in oidc_steps: step_with = step["with"] assert step_with["role-to-assume"] == expected_role # nosec B101 - assert step_with["aws-region"] == "${{ env.AWS_REGION }}" # nosec B101 - assert step_with["allowed-account-ids"] == "${{ env.AWS_ACCOUNT_ID }}" # nosec B101 + assert ( + step_with["aws-region"] == "${{ steps.ci_config.outputs.aws-region }}" + ) # nosec B101 + assert step_with["allowed-account-ids"] == ( # nosec B101 + "${{ steps.ci_config.outputs.aws-account-id }}" + ) + + +def test_operations_alert_triage_uses_repo_python_runner() -> None: + """Keep alert rendering on the repo-managed Python command path.""" + workflow = yaml.safe_load( + (WORKFLOWS_DIR / "operations-alert-triage.yml").read_text(encoding="utf-8") + ) + steps = workflow["jobs"]["triage_operations_alerts"]["steps"] + install_step = next( + step for step in steps if step.get("name") == "Install uv for triage renderer" + ) + triage_step = next( + step + for step in steps + if step.get("name") + == "Create or update GitHub issue for queued operations alerts" + ) + + assert "uv==0.9.21" in install_step["run"] # nosec B101 + assert "GITHUB_PATH" in install_step["run"] # nosec B101 + assert ( # nosec B101 + "uv run python scripts/operations_alert_triage.py" in triage_step["run"] + ) + assert "--groups-file" in triage_step["run"] # nosec B101 + assert "jq '.groups | length'" in triage_step["run"] # nosec B101 + assert "for ((group_index = 0;" in triage_step["run"] # nosec B101 + assert "--visibility-timeout 600" in triage_step["run"] # nosec B101 + assert "python3 scripts/operations_alert_triage.py" not in triage_step["run"] # nosec B101 + + +def test_operations_alert_triage_searches_fingerprint_before_queue_delete() -> None: + """Update or create canonical alert issues before deleting SQS messages.""" + workflow = yaml.safe_load( + (WORKFLOWS_DIR / "operations-alert-triage.yml").read_text(encoding="utf-8") + ) + steps = workflow["jobs"]["triage_operations_alerts"]["steps"] + triage_run = next( + step["run"] + for step in steps + if step.get("name") + == "Create or update GitHub issue for queued operations alerts" + ) + + group_loop_index = triage_run.index("for ((group_index = 0;") + fingerprint_index = triage_run.index('fingerprint="$(cat "${fingerprint_file}")"') + search_index = triage_run.index( + '--search "operations-alert:fingerprint=${fingerprint} in:body"' + ) + comment_index = triage_run.index("gh issue comment") + create_index = triage_run.index("gh issue create") + receipt_index = triage_run.index("jq -r '.Messages[].ReceiptHandle'") + delete_index = triage_run.index("aws sqs delete-message") + + assert group_loop_index < search_index # nosec B101 + assert fingerprint_index < search_index # nosec B101 + assert search_index < comment_index < receipt_index < delete_index # nosec B101 + assert search_index < create_index < receipt_index < delete_index # nosec B101 + assert '--repo "${GITHUB_REPOSITORY_NAME}"' in triage_run # nosec B101 + assert "--state open" in triage_run # nosec B101 + assert "--json number" in triage_run # nosec B101 + assert "--jq '.[0].number // \"\"'" in triage_run # nosec B101 + assert 'existing_issue="$(' in triage_run # nosec B101 + assert 'if [[ -n "${existing_issue}" ]]; then' in triage_run # nosec B101 + assert '--body-file "${body_file}"' in triage_run # nosec B101 + assert ( # nosec B101 + '--title "Operations alerts queued: ${group_alert_count} message(s)"' + in triage_run + ) + assert triage_run.count("aws sqs delete-message") == 1 # nosec B101 + + +def test_operations_alert_backfill_requires_protected_manual_confirmation() -> None: + """Backfilled canonical alert issues must be protected and fingerprinted.""" + workflow = yaml.safe_load( + (WORKFLOWS_DIR / "operations-alert-backfill.yml").read_text(encoding="utf-8") + ) + triggers = _triggers(workflow) + job = workflow["jobs"]["backfill"] + run = job["steps"][1]["run"] + + assert "workflow_dispatch" in triggers # nosec B101 + assert len(triggers["workflow_dispatch"]["inputs"]) <= 10 # nosec B101 + assert "stable_event_json" in triggers["workflow_dispatch"]["inputs"] # nosec B101 + assert job["environment"] == "operations-alert-reconcile" # nosec B101 + assert workflow["permissions"] == {"contents": "read", "issues": "write"} # nosec B101 + assert "id-token" not in workflow["permissions"] # nosec B101 + assert job["steps"][0]["uses"] == ( # nosec B101 + "actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5" + ) + assert ( # nosec B101 + "I confirm these stable fields represent the canonical operations alert stream" + in run + ) + assert "sre_confirmation_reference must be an HTTPS URL" in run # nosec B101 + assert "stable_event_json.source is required" in run # nosec B101 + assert "python3 scripts/operations_alert_triage.py" in run # nosec B101 + assert "operations-alert:fingerprint=${fingerprint} in:body" in run # nosec B101 + assert "gh issue create" in run # nosec B101 + assert "gh issue comment" in run # nosec B101 def test_prod_workflow_requires_successful_test_deploy_for_same_sha() -> None: @@ -805,10 +1087,26 @@ def test_prod_workflow_requires_successful_test_deploy_for_same_sha() -> None: test_workflow = yaml.safe_load( (WORKFLOWS_DIR / "pulumi-test-deploy.yml").read_text(encoding="utf-8") ) - test_preview_env = test_workflow["jobs"]["preview"]["env"] - test_iam_env = test_workflow["jobs"]["iam_validation"]["env"] - test_apply_env = test_workflow["jobs"]["apply"]["env"] - test_drift_env = test_workflow["jobs"]["post_apply_drift"]["env"] + test_preview_ci_config = next( + step + for step in test_workflow["jobs"]["preview"]["steps"] + if step.get("uses") == "./.github/actions/load-aws-ci-env" + ) + test_iam_ci_config = next( + step + for step in test_workflow["jobs"]["iam_validation"]["steps"] + if step.get("uses") == "./.github/actions/load-aws-ci-env" + ) + test_apply_ci_config = next( + step + for step in test_workflow["jobs"]["apply"]["steps"] + if step.get("uses") == "./.github/actions/load-aws-ci-env" + ) + test_drift_ci_config = next( + step + for step in test_workflow["jobs"]["post_apply_drift"]["steps"] + if step.get("uses") == "./.github/actions/load-aws-ci-env" + ) prod_preview_lines = "\n".join( _run_lines(prod_workflow["jobs"]["preview"]["steps"]) ) @@ -834,38 +1132,14 @@ def test_prod_workflow_requires_successful_test_deploy_for_same_sha() -> None: assert "12-digit AWS account ID" in test_preview_lines # nosec B101 assert "s3:// backend" in test_preview_lines # nosec B101 assert "awskms:// URI" in test_preview_lines # nosec B101 - assert ( # nosec B101 - test_preview_env["PULUMI_BACKEND_URL"] - == "${{ vars.PULUMI_BACKEND_URL || vars.PULUMI_PR_BACKEND_URL }}" - ) - assert ( # nosec B101 - test_preview_env["PULUMI_PREVIEW_STACKS"] - == "${{ vars.PULUMI_PREVIEW_STACKS || vars.PULUMI_PR_PREVIEW_STACKS }}" - ) - assert ( # nosec B101 - test_preview_env["PULUMI_DRIFT_STACKS"] - == "${{ vars.PULUMI_DRIFT_STACKS || vars.PULUMI_PR_PREVIEW_STACKS }}" - ) - assert ( # nosec B101 - test_iam_env["PULUMI_BACKEND_URL"] - == "${{ vars.PULUMI_BACKEND_URL || vars.PULUMI_PR_BACKEND_URL }}" - ) - assert ( # nosec B101 - test_iam_env["PULUMI_PREVIEW_STACKS"] - == "${{ vars.PULUMI_PREVIEW_STACKS || vars.PULUMI_PR_PREVIEW_STACKS }}" - ) - assert ( # nosec B101 - test_apply_env["AWS_APPLY_ROLE_ARN"] - == "${{ vars.AWS_APPLY_ROLE_ARN || vars.AWS_PREVIEW_ROLE_ARN }}" - ) - assert ( # nosec B101 - test_apply_env["PULUMI_BACKEND_URL"] - == "${{ vars.PULUMI_BACKEND_URL || vars.PULUMI_PR_BACKEND_URL }}" - ) - assert ( # nosec B101 - test_drift_env["AWS_DRIFT_ROLE_ARN"] - == "${{ vars.AWS_DRIFT_ROLE_ARN || vars.AWS_PREVIEW_ROLE_ARN }}" - ) + assert test_preview_ci_config["with"]["environment"] == "test" # nosec B101 + assert "AWS_APPLY_ROLE_ARN" in test_preview_ci_config["with"]["required-keys"] # nosec B101 + assert "PULUMI_DRIFT_STACKS" in test_preview_ci_config["with"]["required-keys"] # nosec B101 + assert "PULUMI_BACKEND_URL" in test_iam_ci_config["with"]["required-keys"] # nosec B101 + assert "PULUMI_PREVIEW_STACKS" in test_iam_ci_config["with"]["required-keys"] # nosec B101 + assert "AWS_APPLY_ROLE_ARN" in test_apply_ci_config["with"]["required-keys"] # nosec B101 + assert "PULUMI_BACKEND_URL" in test_apply_ci_config["with"]["required-keys"] # nosec B101 + assert "AWS_DRIFT_ROLE_ARN" in test_drift_ci_config["with"]["required-keys"] # nosec B101 test_deploy_query = ( "pulumi-test-deploy.yml/runs?head_sha=${TARGET_SHA}" + "&status=completed&per_page=100" @@ -884,10 +1158,10 @@ def test_prod_workflow_requires_successful_test_deploy_for_same_sha() -> None: assert "make pulumi-plan" in prod_preview_lines # nosec B101 assert "make pulumi-plan" in test_preview_lines # nosec B101 assert "make pulumi-up-plan" in test_apply_lines # nosec B101 - assert "decrypting secret value: cipher: message authentication failed" in ( # nosec B101 + assert "decrypting secret value: cipher: message authentication failed" not in ( # nosec B101 test_apply_lines ) - assert re.search(r"(?m)^\s*make pulumi-up$", test_apply_lines) # nosec B101 + assert not re.search(r"(?m)^\s*make pulumi-up$", test_apply_lines) # nosec B101 assert "decrypting secret value: cipher: message authentication failed" not in ( # nosec B101 prod_apply_lines ) @@ -993,10 +1267,10 @@ def test_pr_comment_workflows_gate_prod_after_successful_test_apply() -> None: _run_lines(runner["jobs"]["test_iam_validation"]["steps"]) ) assert "make pulumi-up-plan" in test_apply_lines # nosec B101 - assert "decrypting secret value: cipher: message authentication failed" in ( # nosec B101 + assert "decrypting secret value: cipher: message authentication failed" not in ( # nosec B101 test_apply_lines ) - assert re.search(r"(?m)^\s*make pulumi-up$", test_apply_lines) # nosec B101 + assert not re.search(r"(?m)^\s*make pulumi-up$", test_apply_lines) # nosec B101 assert "make test-drift" in "\n".join( # nosec B101 _run_lines(runner["jobs"]["test_post_apply_drift"]["steps"]) ) @@ -1021,19 +1295,57 @@ def test_pr_comment_workflows_gate_prod_after_successful_test_apply() -> None: def test_multi_account_environment_docs_are_explicit() -> None: - """Document that account-specific CI config belongs to GitHub environments.""" + """Document AWS Secrets Manager-backed fixed CI configuration.""" docs = "\n".join( ( SECRETS_DOC.read_text(encoding="utf-8"), (PROJECT_ROOT / "docs" / "ci-guardrails.md").read_text(encoding="utf-8"), + (PROJECT_ROOT / ".github" / "github-actions-secrets.md").read_text( + encoding="utf-8" + ), + (PROJECT_ROOT / "docs" / "ci-architecture.md").read_text(encoding="utf-8"), + (PROJECT_ROOT / "docs" / "security-operating-evidence.md").read_text( + encoding="utf-8" + ), + (PROJECT_ROOT / "docs" / "sre-operations.md").read_text(encoding="utf-8"), + (PROJECT_ROOT / "docs" / "aws-secrets-manager-ci-cutover.md").read_text( + encoding="utf-8" + ), + ( + PROJECT_ROOT + / "specs" + / "issue-20-pulumi-esc-ci-config" + / "architecture.md" + ).read_text(encoding="utf-8"), ) ) normalized_docs = docs.lower() - assert "environment-scoped configuration" in normalized_docs # nosec B101 + assert "aws secrets manager" in normalized_docs # nosec B101 + assert "githubciconfigread" in normalized_docs # nosec B101 + assert "pulumi cloud and pulumi esc are not used" in normalized_docs # nosec B101 + assert "pulumi_access_token" in normalized_docs # nosec B101 + assert "put-secret-value" in normalized_docs # nosec B101 + assert "get-secret-value` for verification" in normalized_docs # nosec B101 + assert "aws_test_pr_ci_config_role_arn" in normalized_docs # nosec B101 + assert "aws_prod_preview_ci_config_role_arn" in normalized_docs # nosec B101 + assert "githubciconfigreadrolearns" in normalized_docs # nosec B101 + assert "pulumi-esc.json" not in normalized_docs # nosec B101 + assert "pulumi/auth-actions" not in normalized_docs # nosec B101 + assert "pulumi/esc-action" not in normalized_docs # nosec B101 + assert ( # nosec B101 + "account-configuration boundary is the pulumi esc environment" + not in normalized_docs + ) + assert ( # nosec B101 + "load privileged account configuration from the correct fixed esc environment" + not in normalized_docs + ) + assert "aws secrets manager is the account-configuration boundary" in ( # nosec B101 + normalized_docs + ) + assert "pulumiescsecretsreadrolearn" not in normalized_docs # nosec B101 assert "github environment" in normalized_docs # nosec B101 - for environment_name in ("test", "prod-preview", "prod"): - assert environment_name in docs # nosec B101 for variable_name in ( "AWS_ACCOUNT_ID", "AWS_PREVIEW_ROLE_ARN", @@ -1044,6 +1356,13 @@ def test_multi_account_environment_docs_are_explicit() -> None: "PULUMI_SECRETS_PROVIDER", ): assert variable_name in docs # nosec B101 + for secret_id in ( + "/bootstrap-infrastructure/ci/test-pr", + "/bootstrap-infrastructure/ci/test", + "/bootstrap-infrastructure/ci/prod-preview", + "/bootstrap-infrastructure/ci/prod", + ): + assert secret_id in docs # nosec B101 def test_template_sync_workflows_keep_guardrails() -> None: diff --git a/tests/pulumi/test_project_structure.py b/tests/pulumi/test_project_structure.py index 1a86377..2c0fa25 100644 --- a/tests/pulumi/test_project_structure.py +++ b/tests/pulumi/test_project_structure.py @@ -128,6 +128,9 @@ def test_deploy_stack_exports_bootstrap_outputs() -> None: "pulumiSecretsAliases", "pulumiSecretsProviderUrls", "deployRoleArns", + "ciConfigurationSecretIds", + "ciConfigurationSecretArns", + "githubCiConfigReadRoleArns", "managedRepositoryProjects", "managedRepositoryMetadata", "backupVaultName", @@ -291,7 +294,8 @@ def test_docs_cover_current_testing_and_guardrail_guidance() -> None: "make ci-pr", "make ci", "make pulumi-preview", - "make pulumi-up", + "make pulumi-plan", + "make pulumi-up-plan", ): assert phrase in testing_doc # nosec B101 @@ -300,12 +304,261 @@ def test_alert_route_docs_keep_queue_depth_observation_only() -> None: """Avoid baking volatile SQS queue depth into retained review evidence.""" alert_doc = (ROOT / "docs" / "alert-routing-evidence.md").read_text() operating_doc = (ROOT / "docs" / "operating-review-2026-05-09.md").read_text() + ci_guardrails = (ROOT / "docs" / "ci-guardrails.md").read_text() + setup_doc = (ROOT / "docs" / "github-actions-secrets.md").read_text() + reconcile_workflow = yaml.safe_load( + (ROOT / ".github" / "workflows" / "operations-alert-reconcile.yml").read_text() + ) + backfill_workflow = yaml.safe_load( + (ROOT / ".github" / "workflows" / "operations-alert-backfill.yml").read_text() + ) + reconcile_triggers = reconcile_workflow.get("on", reconcile_workflow.get(True, {})) + backfill_triggers = backfill_workflow.get("on", backfill_workflow.get(True, {})) + reconcile_run = reconcile_workflow["jobs"]["reconcile"]["steps"][0]["run"] + backfill_run = backfill_workflow["jobs"]["backfill"]["steps"][1]["run"] docs = f"{alert_doc}\n{operating_doc}" assert "observation-only metadata" in alert_doc # nosec B101 + assert "Legacy operations-alert issues" in alert_doc # nosec B101 + assert "Operations Alert Canonical Backfill" in alert_doc # nosec B101 + assert "workflow searches issue bodies for the marker" in alert_doc # nosec B101 + assert "Operations Alert Legacy Reconcile" in alert_doc # nosec B101 + assert "operations-alert-reconcile" in alert_doc # nosec B101 + assert ( + "I confirm these stable fields represent the canonical operations alert stream" + in alert_doc + ) # nosec B101 + assert ( + "I confirm these legacy issues match the canonical operations alert stream" + in alert_doc + ) # nosec B101 + assert "sre_confirmation_reference" in alert_doc # nosec B101 + assert "sanitized SRE confirmation" in alert_doc # nosec B101 + assert "sre_confirmation_reference" in ci_guardrails # nosec B101 + assert "sre_confirmation_reference" in setup_doc # nosec B101 assert "stable SNS/SQS route metadata" in operating_doc # nosec B101 assert "ApproximateNumberOfMessages=" not in docs # nosec B101 assert "two visible messages" not in docs # nosec B101 + assert "workflow_dispatch" in reconcile_triggers # nosec B101 + assert reconcile_workflow["jobs"]["reconcile"]["environment"] == ( # nosec B101 + "operations-alert-reconcile" + ) + assert reconcile_workflow["permissions"] == { # nosec B101 + "contents": "read", + "issues": "write", + } + assert "id-token" not in reconcile_workflow["permissions"] # nosec B101 + assert "operations-alert:fingerprint=" in reconcile_run # nosec B101 + assert reconcile_triggers["workflow_dispatch"]["inputs"][ # nosec B101 + "sre_confirmation_reference" + ]["required"] + assert reconcile_workflow["jobs"]["reconcile"]["steps"][0]["shell"] == "bash" # nosec B101 + assert "workflow_dispatch" in backfill_triggers # nosec B101 + assert backfill_workflow["jobs"]["backfill"]["environment"] == ( # nosec B101 + "operations-alert-reconcile" + ) + assert backfill_workflow["permissions"] == { # nosec B101 + "contents": "read", + "issues": "write", + } + assert "id-token" not in backfill_workflow["permissions"] # nosec B101 + backfill_inputs = backfill_triggers["workflow_dispatch"]["inputs"] + assert len(backfill_inputs) <= 10 # nosec B101 + assert "stable_event_json" in backfill_inputs # nosec B101 + assert "sre_confirmation_reference" in backfill_run # nosec B101 + assert "stable_event_json must be an object" in backfill_run # nosec B101 + assert "operations-alert:fingerprint=${fingerprint} in:body" in backfill_run # nosec B101 + assert "python3 scripts/operations_alert_triage.py" in backfill_run # nosec B101 + assert "GH_REPO: ${{ github.repository }}" in yaml.safe_dump( # nosec B101 + reconcile_workflow["jobs"]["reconcile"]["env"] + ) + assert '--repo "${GH_REPO}"' in reconcile_run # nosec B101 + assert "declare -A seen_issues" in reconcile_run # nosec B101 + assert "legacy_issue_ids" in reconcile_run # nosec B101 + assert "provide at least one legacy issue number" in reconcile_run # nosec B101 + assert "SRE_CONFIRMATION_REFERENCE" in reconcile_run # nosec B101 + assert "sre_confirmation_reference must be an HTTPS URL" in reconcile_run # nosec B101 + assert "canonical_state" in reconcile_run # nosec B101 + assert "canonical_title" in reconcile_run # nosec B101 + assert "canonical issue ${canonical} is not open" in reconcile_run # nosec B101 + assert ( + "canonical issue ${canonical} is not an operations alert issue" in reconcile_run + ) # nosec B101 + assert "gh issue close" in reconcile_run # nosec B101 + assert "--duplicate-of" in reconcile_run # nosec B101 + assert "SRE confirmation reference: ${sre_reference}" in reconcile_run # nosec B101 + assert reconcile_run.index("sre_confirmation_reference must") < ( # nosec B101 + reconcile_run.index("gh issue close") + ) + + +def test_ci_guardrails_manual_follow_up_completes_aws_ci_cutover() -> None: + """Keep the issue 20 operator checklist aligned with the cleanup path.""" + ci_guardrails = (ROOT / "docs" / "ci-guardrails.md").read_text() + + assert "apply the Pulumi test and production stacks" in ci_guardrails # nosec B101 + assert "AWS Secrets Manager" in ci_guardrails # nosec B101 + assert "GitHubCiConfigRead" in ci_guardrails # nosec B101 + assert "GitHub Environment Legacy Variable Cleanup" in ci_guardrails # nosec B101 + assert "GH_ENVIRONMENT_ADMIN_TOKEN" in ci_guardrails # nosec B101 + assert "no stale AWS trust subjects" in ci_guardrails # nosec B101 + assert "protected `prod` approval boundary" in ci_guardrails # nosec B101 + assert "operations-alert-reconcile" in ci_guardrails # nosec B101 + + +def test_issue20_cutover_manual_is_secret_safe_and_actionable() -> None: + """Keep the human AWS-only cutover runbook explicit and source-of-truth safe.""" + manual = (ROOT / "docs" / "aws-secrets-manager-ci-cutover.md").read_text() + setup_doc = (ROOT / "docs" / "github-actions-secrets.md").read_text() + github_setup_doc = (ROOT / ".github" / "github-actions-secrets.md").read_text() + readme = (ROOT / "README.md").read_text() + docs_readme = (ROOT / "docs" / "README.md").read_text() + sre_operations = (ROOT / "docs" / "sre-operations.md").read_text() + security_baseline = (ROOT / "docs" / "security-baseline.md").read_text() + pulumi_guardrails = (ROOT / "docs" / "pulumi-guardrails.md").read_text() + env_dist = (ROOT / ".env.dist").read_text() + + for phrase in ( + "does not require Pulumi Cloud or Pulumi ESC", + "Required GitHub Variables", + "Required Secrets Manager Payloads", + "put-secret-value", + "Do not use `get-secret-value` for verification", + "Fix Local AWS CLI For Test", + "GitHubCiConfigRead", + "githubCiConfigReadRoleArns", + "GH_ENVIRONMENT_ADMIN_TOKEN", + "Operations Alert Legacy Reconcile", + "sre_confirmation_reference", + "I confirm AWS Secrets Manager-backed privileged CI is green", + "I confirm these legacy issues match the canonical operations alert stream", + ): + assert phrase in manual # nosec B101 + + for github_var in ( + "AWS_TEST_PR_CI_CONFIG_ROLE_ARN", + "AWS_TEST_CI_CONFIG_ROLE_ARN", + "AWS_PROD_PREVIEW_CI_CONFIG_ROLE_ARN", + "AWS_PROD_CI_CONFIG_ROLE_ARN", + ): + assert github_var in manual # nosec B101 + + for secret_id in ( + "/bootstrap-infrastructure/ci/test-pr", + "/bootstrap-infrastructure/ci/test", + "/bootstrap-infrastructure/ci/prod-preview", + "/bootstrap-infrastructure/ci/prod", + ): + assert secret_id in manual # nosec B101 + + for variable_name in ( + "AWS_ACCOUNT_ID", + "AWS_PREVIEW_ROLE_ARN", + "AWS_APPLY_ROLE_ARN", + "AWS_DRIFT_ROLE_ARN", + "AWS_OPERATIONS_ALERT_TRIAGE_ROLE_ARN", + "OPERATIONS_ALERT_QUEUE_NAME", + "OPERATIONS_TOPIC_ARN", + "OPERATIONS_CLOUDTRAIL_NAME", + "PULUMI_BACKEND_URL", + "PULUMI_SECRETS_PROVIDER", + "PULUMI_PREVIEW_STACKS", + "PULUMI_DRIFT_STACKS", + ): + assert variable_name in manual # nosec B101 + + assert "SecretAccessKey" not in manual # nosec B101 + assert "secretAccessKey" not in manual # nosec B101 + assert "PULUMI_ACCESS_TOKEN" not in env_dist # nosec B101 + for operator_doc in ( + readme, + docs_readme, + sre_operations, + security_baseline, + pulumi_guardrails, + ): + assert re.search(r"make pulumi-up(?!-)", operator_doc) is None # nosec B101 + assert "aws-secrets-manager-ci-cutover.md" in setup_doc # nosec B101 + assert "aws-secrets-manager-ci-cutover.md" in github_setup_doc # nosec B101 + assert "aws-secrets-manager-ci-cutover.md" in readme # nosec B101 + assert "aws-secrets-manager-ci-cutover.md" in docs_readme # nosec B101 + + +def test_issue20_closeout_evidence_tracks_external_manual_steps() -> None: + """Keep current issue 20 closeout evidence explicit and secret-safe.""" + closeout = ( + ROOT + / "specs" + / "issue-20-pulumi-esc-ci-config" + / "current-closeout-evidence-2026-05-25.md" + ).read_text() + + for phrase in ( + "AWS Secrets Manager remains the source of truth", + "Pulumi Cloud and Pulumi ESC are not used", + "AWS-only setup removes that Pulumi Cloud token exchange path", + "1470d28", + "`pulumi up` is disabled whenever `GITHUB_ACTIONS=true`", + "removes the silent pull-request fallback", + "PR config-read role variable is missing", + "invalid organization vilnacrm-org", + "access_key: env", + "config-role-arn must be an AWS IAM role ARN", + "Not authorized to perform sts:AssumeRoleWithWebIdentity", + "No test-account profile is configured locally yet", + "ResourceNotFoundException", + "NoSuchEntityException", + "canonical fingerprinted issue", + "SRE confirms", + "sre_confirmation_reference", + "Manual secure setup required", + "Generated BMAD/BMALPH/Ralph framework state remains intentionally uncommitted", + ): + assert phrase in closeout # nosec B101 + + for issue in ("#20", "#49", "#50", "#52", "#53", "#54", "#55", "#56", "#58"): + assert issue in closeout # nosec B101 + + assert "No secret values" in closeout # nosec B101 + assert "SecretAccessKey" not in closeout # nosec B101 + + +def test_github_environment_cleanup_is_manual_and_guarded() -> None: + """Keep post-AWS-cutover GitHub Environment cleanup explicit and non-AWS.""" + cleanup_workflow = yaml.safe_load( + ( + ROOT / ".github" / "workflows" / "github-environment-legacy-cleanup.yml" + ).read_text() + ) + cleanup_triggers = cleanup_workflow.get("on", cleanup_workflow.get(True, {})) + cleanup_run = cleanup_workflow["jobs"]["cleanup"]["steps"][0]["run"] + setup_doc = (ROOT / "docs" / "github-actions-secrets.md").read_text() + + assert "workflow_dispatch" in cleanup_triggers # nosec B101 + assert cleanup_workflow["permissions"] == { # nosec B101 + "contents": "read", + } + assert "actions" not in cleanup_workflow["permissions"] # nosec B101 + assert "id-token" not in cleanup_workflow["permissions"] # nosec B101 + assert cleanup_triggers["workflow_dispatch"]["inputs"]["dry_run"][ # nosec B101 + "default" + ] + assert cleanup_workflow["jobs"]["cleanup"]["steps"][0]["shell"] == "bash" # nosec B101 + assert "GH_ENVIRONMENT_ADMIN_TOKEN" in cleanup_run # nosec B101 + assert "github.token" not in yaml.safe_dump(cleanup_workflow) # nosec B101 + assert "legacy GitHub Environment variables can be removed" in cleanup_run # nosec B101 + assert "gh variable delete" in cleanup_run # nosec B101 + assert '--env "${environment_name}"' in cleanup_run # nosec B101 + assert "AWS_ACCOUNT_ID" in cleanup_run # nosec B101 + assert "AWS_OPERATIONS_ALERT_TRIAGE_ROLE_ARN" in cleanup_run # nosec B101 + assert "PULUMI_BACKEND_URL" in cleanup_run # nosec B101 + assert "PULUMI_PR_BACKEND_URL" in cleanup_run # nosec B101 + assert "PULUMI_PR_PREVIEW_STACKS" in cleanup_run # nosec B101 + assert "remaining_names" in cleanup_run # nosec B101 + assert "GitHub Environment Legacy Variable Cleanup" in setup_doc # nosec B101 + assert "repository **Environments** write permission" in setup_doc # nosec B101 + assert "`PULUMI_PR_*`" in setup_doc # nosec B101 + assert "dry_run=false" in setup_doc # nosec B101 def test_completion_audit_avoids_self_stale_exact_head_metadata() -> None: diff --git a/tests/unit/test_components.py b/tests/unit/test_components.py index ac87c68..e5aa5ac 100644 --- a/tests/unit/test_components.py +++ b/tests/unit/test_components.py @@ -8,6 +8,7 @@ BootstrapInfrastructure, BootstrapInfrastructureDependencies, CentralLoggingBuckets, + CiConfiguration, CostControlInputs, CostControls, GitHubAutomation, @@ -18,6 +19,7 @@ S3BackupPlan, SecurityAccountControls, automation, + ci_config, config, logging_bucket, operations_monitoring, @@ -218,6 +220,261 @@ def test_github_automation_policy_normalizes_sns_environment_and_allocation_tags ) +def test_github_automation_trust_keeps_environment_subject_prod_only(): + provider_arn = ( + "arn:aws:iam::123456789012:oidc-provider/token.actions.githubusercontent.com" + ) + + test_policy = json.loads( + automation._automation_assume_role_policy( + provider_arn, + "VilnaCRM-Org", + "bootstrap-infrastructure", + "test", + "main", + ) + ) + prod_policy = json.loads( + automation._automation_assume_role_policy( + provider_arn, + "VilnaCRM-Org", + "bootstrap-infrastructure", + "prod", + "main", + ) + ) + + test_subjects = test_policy["Statement"][0]["Condition"]["StringEquals"][ + "token.actions.githubusercontent.com:sub" + ] + prod_subjects = prod_policy["Statement"][0]["Condition"]["StringEquals"][ + "token.actions.githubusercontent.com:sub" + ] + + assert test_subjects == [ # nosec B101 + "repo:VilnaCRM-Org/bootstrap-infrastructure:ref:refs/heads/main", + "repo:VilnaCRM-Org/bootstrap-infrastructure:pull_request", + ] + assert prod_subjects == [ # nosec B101 + "repo:VilnaCRM-Org/bootstrap-infrastructure:environment:prod" + ] + + +def test_ci_configuration_manages_aws_secret_containers_and_github_read_roles( + pulumi_mocks, + monkeypatch, +): # noqa: ARG001 + monkeypatch.setattr(ci_config, "_secret_exists", lambda _name: False) + monkeypatch.setattr(ci_config, "_iam_role_exists", lambda _name: False) + provider_arn = ( + "arn:aws:iam::123456789012:oidc-provider/token.actions.githubusercontent.com" + ) + settings = config.BootstrapSettings( + org="VilnaCRM-Org", + repo="bootstrap-infrastructure", + environment="test", + owner="platform", + cost_center="core", + data_classification="internal", + criticality="high", + retention_class="standard", + github_branch="main", + logging_prefix="company", + replication_region=None, + github_token=None, + github_oidc_provider_arn=None, + ) + + start = len(pulumi_mocks.resources) + component = CiConfiguration( + "ci-configuration", + settings=settings, + oidc_provider_arn=provider_arn, + ) + + _sync_await(future_output(component.secret_arns["test-pr"])) + _sync_await(future_output(component.secret_arns["test"])) + test_pr_role_arn = _sync_await(future_output(component.read_role_arns["test-pr"])) + test_role_arn = _sync_await(future_output(component.read_role_arns["test"])) + assert test_pr_role_arn.endswith( # nosec B101 + ":role/GitHubCiConfigRead-bootstrap-infrastructure-test-pr" + ) + assert test_role_arn.endswith( # nosec B101 + ":role/GitHubCiConfigRead-bootstrap-infrastructure-test" + ) + assert component.secret_ids == { # nosec B101 + "test-pr": "/bootstrap-infrastructure/ci/test-pr", + "test": "/bootstrap-infrastructure/ci/test", + } + + new_resources = pulumi_mocks.resources[start:] + secret_states = { + state["name"]: state + for resource_type, _name, state in new_resources + if resource_type == "aws:secretsmanager/secret:Secret" + } + assert not any( # nosec B101 + resource_type == "aws:secretsmanager/secretVersion:SecretVersion" + for resource_type, _name, _state in new_resources + ) + assert set(secret_states) == { # nosec B101 + "/bootstrap-infrastructure/ci/test-pr", + "/bootstrap-infrastructure/ci/test", + } + assert all( # nosec B101 + state["tags"]["Purpose"] == "ci-configuration" + for state in secret_states.values() + ) + assert ( + secret_states["/bootstrap-infrastructure/ci/test-pr"]["tags"][ # nosec B101 + "CiConfigSuffix" + ] + == "test-pr" + ) + + test_pr_role_state = _resource_state_by_name( + pulumi_mocks, + "ci-configuration-github-ci-config-read-role-test-pr", + ) + test_role_state = _resource_state_by_name( + pulumi_mocks, + "ci-configuration-github-ci-config-read-role-test", + ) + test_pr_policy = json.loads(test_pr_role_state["assumeRolePolicy"]) + test_policy = json.loads(test_role_state["assumeRolePolicy"]) + test_pr_condition = test_pr_policy["Statement"][0]["Condition"] + test_condition = test_policy["Statement"][0]["Condition"] + assert test_pr_condition["StringEquals"] == { # nosec B101 + "token.actions.githubusercontent.com:aud": "sts.amazonaws.com", + "token.actions.githubusercontent.com:sub": [ + "repo:VilnaCRM-Org/bootstrap-infrastructure:pull_request" + ], + } + assert test_pr_condition["StringLike"][ # nosec B101 + "token.actions.githubusercontent.com:job_workflow_ref" + ] == [ + "VilnaCRM-Org/bootstrap-infrastructure/.github/workflows/pulumi-pr-guardrails.yml@refs/*", + "VilnaCRM-Org/bootstrap-infrastructure/.github/workflows/well-architected-evidence.yml@refs/*", + ] + assert test_condition["StringEquals"] == { # nosec B101 + "token.actions.githubusercontent.com:aud": "sts.amazonaws.com", + "token.actions.githubusercontent.com:sub": [ + "repo:VilnaCRM-Org/bootstrap-infrastructure:ref:refs/heads/main" + ], + } + + test_pr_policy_state = _resource_state_by_name( + pulumi_mocks, + "ci-configuration-github-ci-config-read-policy-test-pr", + ) + test_policy_state = _resource_state_by_name( + pulumi_mocks, + "ci-configuration-github-ci-config-read-policy-test", + ) + policy = json.loads(test_pr_policy_state["policy"]) + statement = policy["Statement"][0] + assert statement["Action"] == [ # nosec B101 + "secretsmanager:DescribeSecret", + "secretsmanager:GetSecretValue", + ] + assert statement["Resource"] == [ # nosec B101 + "arn:aws:secretsmanager:*:123456789012:secret:" + "/bootstrap-infrastructure/ci/test-pr-*" + ] + test_policy_document = json.loads(test_policy_state["policy"]) + assert ( # nosec B101 + "/bootstrap-infrastructure/ci/test" in json.dumps(test_policy_document) + ) + + +def test_ci_configuration_uses_github_oidc_provider_for_prod_suffixes( + pulumi_mocks, + monkeypatch, +): # noqa: ARG001 + provider_arn = ( + "arn:aws:iam::123456789012:oidc-provider/token.actions.githubusercontent.com" + ) + monkeypatch.setattr(ci_config, "_secret_exists", lambda _name: False) + monkeypatch.setattr(ci_config, "_iam_role_exists", lambda _name: False) + settings = config.BootstrapSettings( + org="VilnaCRM-Org", + repo="bootstrap-infrastructure", + environment="prod", + owner="platform", + cost_center="core", + data_classification="internal", + criticality="high", + retention_class="standard", + github_branch="main", + logging_prefix="company", + replication_region=None, + github_token=None, + github_oidc_provider_arn=None, + ) + + start = len(pulumi_mocks.resources) + component = CiConfiguration( + "ci-configuration-prod", + settings=settings, + oidc_provider_arn=provider_arn, + ) + + _sync_await(future_output(component.read_role_arns["prod-preview"])) + _sync_await(future_output(component.read_role_arns["prod"])) + provider_resources = { + name + for resource_type, name, _state in pulumi_mocks.resources[start:] + if resource_type == "aws:iam/openIdConnectProvider:OpenIdConnectProvider" + } + assert provider_resources == set() # nosec B101 + assert component.secret_ids == { # nosec B101 + "prod-preview": "/bootstrap-infrastructure/ci/prod-preview", + "prod": "/bootstrap-infrastructure/ci/prod", + } + preview_role_state = _resource_state_by_name( + pulumi_mocks, + "ci-configuration-prod-github-ci-config-read-role-prod-preview", + ) + prod_role_state = _resource_state_by_name( + pulumi_mocks, + "ci-configuration-prod-github-ci-config-read-role-prod", + ) + preview_policy = json.loads(preview_role_state["assumeRolePolicy"]) + prod_policy = json.loads(prod_role_state["assumeRolePolicy"]) + assert preview_policy["Statement"][0]["Condition"]["StringEquals"][ # nosec B101 + "token.actions.githubusercontent.com:sub" + ] == ["repo:VilnaCRM-Org/bootstrap-infrastructure:ref:refs/heads/main"] + assert prod_policy["Statement"][0]["Condition"]["StringEquals"][ # nosec B101 + "token.actions.githubusercontent.com:sub" + ] == ["repo:VilnaCRM-Org/bootstrap-infrastructure:environment:prod"] + + +def test_ci_configuration_requires_github_oidc_provider( + pulumi_mocks, + monkeypatch, +): # noqa: ARG001 + monkeypatch.setattr(ci_config, "_secret_exists", lambda _name: False) + monkeypatch.setattr(ci_config, "_iam_role_exists", lambda _name: False) + settings = config.BootstrapSettings( + org="VilnaCRM-Org", + repo="bootstrap-infrastructure", + environment="test", + owner="platform", + cost_center="core", + data_classification="internal", + criticality="high", + retention_class="standard", + github_branch="main", + logging_prefix="company", + replication_region=None, + github_token=None, + github_oidc_provider_arn=None, + ) + + with pytest.raises(ValueError, match="githubOidcProviderArn config is required"): + CiConfiguration("ci-configuration-no-provider", settings=settings) + + def test_components_build(pulumi_mocks, monkeypatch): # noqa: ARG001 monkeypatch.setattr(config.settings, "logging_prefix", "company") monkeypatch.setattr(config.settings, "repo", "bootstrap-infrastructure") @@ -873,6 +1130,9 @@ def test_bootstrap_infrastructure_composes_catalog_and_di(pulumi_mocks, monkeypa assert "operationsAlertQueueSubscriptionArn" in bootstrap.outputs # nosec B101 assert "backupVaultArn" in bootstrap.outputs # nosec B101 assert "backupRoleArn" in bootstrap.outputs # nosec B101 + assert "ciConfigurationSecretIds" in bootstrap.outputs # nosec B101 + assert "ciConfigurationSecretArns" in bootstrap.outputs # nosec B101 + assert "githubCiConfigReadRoleArns" in bootstrap.outputs # nosec B101 assert "guardDutyDetectorId" in bootstrap.outputs # nosec B101 assert "securityHubAccountArn" in bootstrap.outputs # nosec B101 assert "awsConfigRecorderName" in bootstrap.outputs # nosec B101 @@ -1292,13 +1552,29 @@ def test_github_automation_emits_runner_repository_and_role(pulumi_mocks, monkey assert role_type == "aws:iam/role:Role" # nosec B101 assert triage_role_type == "aws:iam/role:Role" # nosec B101 assert ( - "repo:VilnaCRM-Org/bootstrap-infrastructure:environment:test" + "repo:VilnaCRM-Org/bootstrap-infrastructure:ref:refs/heads/main" in role_state["assumeRolePolicy"] ) # nosec B101 + assert ( # nosec B101 + "repo:VilnaCRM-Org/bootstrap-infrastructure:pull_request" + in role_state["assumeRolePolicy"] + ) assert ( # nosec B101 "repo:VilnaCRM-Org/bootstrap-infrastructure:environment:test" + not in role_state["assumeRolePolicy"] + ) + assert ( # nosec B101 + "repo:VilnaCRM-Org/bootstrap-infrastructure:ref:refs/heads/main" in triage_role_state["assumeRolePolicy"] ) + assert ( # nosec B101 + "repo:VilnaCRM-Org/bootstrap-infrastructure:environment:test" + not in triage_role_state["assumeRolePolicy"] + ) + assert ( # nosec B101 + "VilnaCRM-Org/bootstrap-infrastructure/.github/workflows/" + "pulumi-pr-guardrails.yml@refs/*" in role_state["assumeRolePolicy"] + ) assert ( # nosec B101 "VilnaCRM-Org/bootstrap-infrastructure/.github/workflows/" "operations-alert-triage.yml@refs/heads/main" @@ -1357,6 +1633,19 @@ def test_github_automation_emits_runner_repository_and_role(pulumi_mocks, monkey "arn:aws:iam::123456789012:role/OperationsAlertTriage-" "bootstrap-infrastructure-test" in statements["ManageBootstrapIam"]["Resource"] ) + assert ( # nosec B101 + "arn:aws:iam::123456789012:role/GitHubCiConfigRead-" + "bootstrap-infrastructure-test-pr" + in statements["ManageBootstrapIam"]["Resource"] + ) + assert ( # nosec B101 + "arn:aws:iam::123456789012:role/GitHubCiConfigRead-" + "bootstrap-infrastructure-test" in statements["ManageBootstrapIam"]["Resource"] + ) + assert ( # nosec B101 + "arn:aws:iam::123456789012:oidc-provider/api.pulumi.com/oidc" + not in statements["ManageBootstrapIam"]["Resource"] + ) assert statements["ManageBootstrapS3"]["Resource"] == [ # nosec B101 "arn:aws:s3:::pulumi-*-test-state", "arn:aws:s3:::pulumi-*-test-state-*-replication", @@ -1387,6 +1676,9 @@ def test_github_automation_emits_runner_repository_and_role(pulumi_mocks, monkey assert "cloudtrail:DescribeTrails" in all_actions # nosec B101 assert "sqs:ReceiveMessage" not in all_allow_actions # nosec B101 assert "sqs:DeleteMessage" not in all_allow_actions # nosec B101 + assert "secretsmanager:GetSecretValue" not in all_allow_actions # nosec B101 + assert "secretsmanager:PutSecretValue" not in all_allow_actions # nosec B101 + assert "secretsmanager:UpdateSecret" not in all_allow_actions # nosec B101 assert statements["ManageBootstrapEventBridge"]["Resource"] == [ # nosec B101 "arn:aws:events:*:123456789012:rule/bootstrap-test-*" ] @@ -1408,6 +1700,38 @@ def test_github_automation_emits_runner_repository_and_role(pulumi_mocks, monkey "sns:GetSubscriptionAttributes", "sns:Unsubscribe", ] + assert statements["CreateBootstrapCiSecrets"]["Resource"] == [ # nosec B101 + ( + "arn:aws:secretsmanager:*:123456789012:secret:" + "/bootstrap-infrastructure/ci/test-pr-*" + ), + ( + "arn:aws:secretsmanager:*:123456789012:secret:" + "/bootstrap-infrastructure/ci/test-*" + ), + ] + assert statements["CreateBootstrapCiSecrets"]["Condition"] == { # nosec B101 + "StringEquals": { + "aws:RequestTag/Environment": "test", + "aws:RequestTag/Purpose": "ci-configuration", + } + } + assert ( + statements["ManageBootstrapCiSecrets"]["Resource"] + == ( # nosec B101 + statements["CreateBootstrapCiSecrets"]["Resource"] + ) + ) + assert statements["CreateBootstrapCiSecrets"]["Action"] == [ # nosec B101 + "secretsmanager:CreateSecret", + "secretsmanager:TagResource", + ] + assert ( + "secretsmanager:TagResource" + in statements["ManageBootstrapCiSecrets"][ # nosec B101 + "Action" + ] + ) assert statements["DenyBootstrapSqsConsumption"] == { # nosec B101 "Sid": "DenyBootstrapSqsConsumption", "Effect": "Deny", diff --git a/tests/unit/test_mutation_targets.py b/tests/unit/test_mutation_targets.py index 22bda29..5ea6870 100644 --- a/tests/unit/test_mutation_targets.py +++ b/tests/unit/test_mutation_targets.py @@ -3,8 +3,9 @@ from types import SimpleNamespace import infra.automation as automation +import infra.bootstrap_infrastructure as bootstrap_infrastructure import pytest -from infra import config, pulumi_secrets +from infra import ci_config, config, pulumi_secrets from infra.iam import github_oidc import pulumi @@ -43,6 +44,220 @@ def test_mutation_target_adoption_helpers_detect_existing_resources(monkeypatch) assert pulumi_secrets._kms_alias_exists("alias/repo") is True # nosec B101 +def test_mutation_target_bootstrap_repository_project_fallback(): + repositories = [ + config.ManagedRepository( + name="core-service-infrastructure", + default_branch="main", + project="core-service", + ) + ] + + assert ( # nosec B101 + bootstrap_infrastructure._repository_project( + repositories, + "core-service-infrastructure", + ) + == "core-service" + ) + assert ( # nosec B101 + bootstrap_infrastructure._repository_project(repositories, "missing-repo") + == "missing-repo" + ) + + +def test_mutation_target_ci_config_secret_contract(): + settings = config.BootstrapSettings( + org="VilnaCRM-Org", + repo="bootstrap-infrastructure", + environment="test", + owner="platform", + cost_center="core", + data_classification="internal", + criticality="high", + retention_class="standard", + github_branch="main", + logging_prefix="company", + replication_region=None, + github_token=None, + github_oidc_provider_arn=None, + ) + + assert ci_config._ci_secret_suffixes("test") == ("test-pr", "test") # nosec B101 + assert ci_config._ci_secret_suffixes("prod") == ( # nosec B101 + "prod-preview", + "prod", + ) + assert ci_config._ci_secret_id(settings, "test") == ( # nosec B101 + "/bootstrap-infrastructure/ci/test" + ) + assert ci_config._github_actions_subjects(settings, "test-pr") == [ # nosec B101 + "repo:VilnaCRM-Org/bootstrap-infrastructure:pull_request" + ] + assert ci_config._github_actions_subjects(settings, "test") == [ # nosec B101 + "repo:VilnaCRM-Org/bootstrap-infrastructure:ref:refs/heads/main" + ] + + policy = json.loads( + ci_config._ci_config_read_policy( + account_id="123456789012", + partition="aws", + settings=settings, + suffixes=("test-pr", "test"), + ) + ) + statement = policy["Statement"][0] + assert statement["Action"] == [ # nosec B101 + "secretsmanager:DescribeSecret", + "secretsmanager:GetSecretValue", + ] + assert statement["Resource"] == [ # nosec B101 + ( + "arn:aws:secretsmanager:*:123456789012:secret:" + "/bootstrap-infrastructure/ci/test-pr-*" + ), + ( + "arn:aws:secretsmanager:*:123456789012:secret:" + "/bootstrap-infrastructure/ci/test-*" + ), + ] + + +def test_mutation_target_ci_config_validation_and_lookup_helpers(monkeypatch): + settings = config.BootstrapSettings( + org="VilnaCRM-Org", + repo="bootstrap-infrastructure", + environment="test", + owner="platform", + cost_center="core", + data_classification="internal", + criticality="high", + retention_class="standard", + github_branch="main", + logging_prefix="company", + replication_region=None, + github_token=None, + github_oidc_provider_arn=None, + ) + no_repo_settings = config.BootstrapSettings( + org="VilnaCRM-Org", + repo=None, + environment="test", + owner="platform", + cost_center="core", + data_classification="internal", + criticality="high", + retention_class="standard", + github_branch="main", + logging_prefix="company", + replication_region=None, + github_token=None, + github_oidc_provider_arn=None, + ) + long_role_settings = config.BootstrapSettings( + org="VilnaCRM-Org", + repo="a" * 50, + environment="test", + owner="platform", + cost_center="core", + data_classification="internal", + criticality="high", + retention_class="standard", + github_branch="main", + logging_prefix="company", + replication_region=None, + github_token=None, + github_oidc_provider_arn=None, + ) + + with pytest.raises(ValueError, match="repoSlug config is required"): + ci_config._ci_config_project(no_repo_settings) + with pytest.raises(ValueError, match="repoSlug config is required"): + ci_config._github_actions_subjects(no_repo_settings, "test") + with pytest.raises(ValueError, match="repoSlug config is required"): + ci_config._github_actions_workflow_refs(no_repo_settings, "test") + with pytest.raises(ValueError, match="longer than 64 characters"): + ci_config._ci_config_read_role_name(long_role_settings, "test") + assert ci_config._ci_config_read_role_name(settings, "test") == ( # nosec B101 + "GitHubCiConfigRead-bootstrap-infrastructure-test" + ) + assert ci_config._github_actions_subjects(settings, "test-pr") == [ # nosec B101 + "repo:VilnaCRM-Org/bootstrap-infrastructure:pull_request" + ] + assert ci_config._github_actions_workflow_refs(settings, "test-pr") == [ # nosec B101 + ( + "VilnaCRM-Org/bootstrap-infrastructure/.github/workflows/" + "pulumi-pr-guardrails.yml@refs/*" + ), + ( + "VilnaCRM-Org/bootstrap-infrastructure/.github/workflows/" + "well-architected-evidence.yml@refs/*" + ), + ] + assert ci_config._github_actions_subjects(settings, "prod") == [ # nosec B101 + "repo:VilnaCRM-Org/bootstrap-infrastructure:environment:prod" + ] + trust_policy = json.loads( + ci_config._ci_config_read_assume_role_policy( + "arn:aws:iam::123456789012:oidc-provider/token.actions.githubusercontent.com", + settings, + "prod-preview", + ) + ) + assert trust_policy["Statement"][0]["Condition"]["StringEquals"][ # nosec B101 + "token.actions.githubusercontent.com:sub" + ] == ["repo:VilnaCRM-Org/bootstrap-infrastructure:ref:refs/heads/main"] + assert ci_config._is_missing_lookup_error( # nosec B101 + "reading KMS Alias: empty result", + (), + ) + assert not ci_config._is_missing_lookup_error("iam throttled", ()) # nosec B101 + + monkeypatch.setattr( + ci_config.aws.secretsmanager, + "get_secret", + lambda *, name: SimpleNamespace(arn=f"arn:aws:secretsmanager:::secret:{name}"), + ) + assert ci_config._secret_exists("present") is True # nosec B101 + monkeypatch.setattr( + ci_config.aws.secretsmanager, + "get_secret", + lambda *, name: SimpleNamespace(name=name), + ) + assert ci_config._secret_exists("without-arn") is False # nosec B101 + + def missing_secret(*, name): # noqa: ARG001 + raise RuntimeError("ResourceNotFoundException") + + def failing_secret(*, name): # noqa: ARG001 + raise RuntimeError("secretsmanager throttled") + + monkeypatch.setattr(ci_config.aws.secretsmanager, "get_secret", missing_secret) + assert ci_config._secret_exists("missing") is False # nosec B101 + monkeypatch.setattr(ci_config.aws.secretsmanager, "get_secret", failing_secret) + with pytest.raises(RuntimeError, match="secretsmanager throttled"): + ci_config._secret_exists("failing") + + monkeypatch.setattr( + ci_config.aws.iam, + "get_role", + lambda *, name: SimpleNamespace(arn=f"arn:aws:iam:::role/{name}"), + ) + assert ci_config._iam_role_exists("present") is True # nosec B101 + + def missing_role(*, name): # noqa: ARG001 + raise RuntimeError("NoSuchEntity") + + def failing_role(*, name): # noqa: ARG001 + raise RuntimeError("iam throttled") + + monkeypatch.setattr(ci_config.aws.iam, "get_role", missing_role) + assert ci_config._iam_role_exists("missing") is False # nosec B101 + monkeypatch.setattr(ci_config.aws.iam, "get_role", failing_role) + with pytest.raises(RuntimeError, match="iam throttled"): + ci_config._iam_role_exists("failing") + + def test_mutation_target_adoption_helpers_treat_not_found_as_absent(monkeypatch): def missing_ecr_repository(*, name): raise RuntimeError(f"RepositoryNotFoundException: {name}") @@ -253,6 +468,7 @@ def test_mutation_target_github_automation_policy_uses_explicit_actions(monkeypa assert "events:*" not in actions # nosec B101 assert "cloudtrail:*" not in actions # nosec B101 assert "sns:*" not in actions # nosec B101 + assert "secretsmanager:*" not in actions # nosec B101 assert "guardduty:*" not in actions # nosec B101 assert "securityhub:*" not in actions # nosec B101 assert "config:*" not in actions # nosec B101 @@ -267,6 +483,10 @@ def test_mutation_target_github_automation_policy_uses_explicit_actions(monkeypa assert "cloudtrail:CreateTrail" in actions # nosec B101 assert "sns:CreateTopic" in actions # nosec B101 assert "sqs:CreateQueue" in actions # nosec B101 + assert "secretsmanager:CreateSecret" in actions # nosec B101 + assert "secretsmanager:GetSecretValue" not in allow_actions # nosec B101 + assert "secretsmanager:PutSecretValue" not in allow_actions # nosec B101 + assert "secretsmanager:UpdateSecret" not in allow_actions # nosec B101 assert "sqs:ReceiveMessage" not in allow_actions # nosec B101 assert "sqs:DeleteMessage" not in allow_actions # nosec B101 assert "budgets:ModifyBudget" in actions # nosec B101 @@ -307,6 +527,39 @@ def test_mutation_target_github_automation_policy_uses_explicit_actions(monkeypa assert statements["ManageBootstrapSqs"]["Resource"] == [ # nosec B101 "arn:aws:sqs:*:123456789012:bootstrap-test-operations-alerts" ] + assert statements["CreateBootstrapCiSecrets"] == { # nosec B101 + "Sid": "CreateBootstrapCiSecrets", + "Effect": "Allow", + "Action": ["secretsmanager:CreateSecret", "secretsmanager:TagResource"], + "Resource": [ + ( + "arn:aws:secretsmanager:*:123456789012:secret:" + "/bootstrap-infrastructure/ci/test-pr-*" + ), + ( + "arn:aws:secretsmanager:*:123456789012:secret:" + "/bootstrap-infrastructure/ci/test-*" + ), + ], + "Condition": { + "StringEquals": { + "aws:RequestTag/Environment": "test", + "aws:RequestTag/Purpose": "ci-configuration", + } + }, + } + assert ( + statements["ManageBootstrapCiSecrets"]["Resource"] + == ( # nosec B101 + statements["CreateBootstrapCiSecrets"]["Resource"] + ) + ) + assert statements["ManageBootstrapCiSecrets"]["Condition"] == { # nosec B101 + "StringEquals": { + "aws:ResourceTag/Environment": "test", + "aws:ResourceTag/Purpose": "ci-configuration", + } + } assert statements["DenyBootstrapSqsConsumption"] == { # nosec B101 "Sid": "DenyBootstrapSqsConsumption", "Effect": "Deny", diff --git a/tests/unit/test_operations_alert_triage.py b/tests/unit/test_operations_alert_triage.py new file mode 100644 index 0000000..d9b9e4d --- /dev/null +++ b/tests/unit/test_operations_alert_triage.py @@ -0,0 +1,382 @@ +from __future__ import annotations + +import importlib +import json +import sys +from pathlib import Path + +SCRIPTS_DIR = Path(__file__).resolve().parents[2] / "scripts" +if str(SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(SCRIPTS_DIR)) + +triage = importlib.import_module("operations_alert_triage") + + +def _message( + *, + backup_job_id: str, + message_id: str = "sqs-1", + resource_arn: str = "arn:aws:s3:::example", +) -> dict[str, object]: + event = { + "source": "aws.backup", + "detail-type": "Backup Job State Change", + "time": "2026-05-24T06:08:57Z", + "detail": { + "state": "FAILED", + "backupVaultName": "bootstrap-test", + "backupPlanId": "plan-1", + "backupRuleId": "rule-1", + "resourceArn": resource_arn, + "backupJobId": backup_job_id, + }, + } + sns = { + "MessageId": "sns-1", + "Timestamp": "2026-05-24T06:08:58Z", + "Message": json.dumps(event), + } + return { + "MessageId": message_id, + "Body": json.dumps(sns), + "Attributes": {"SentTimestamp": "1779602937768"}, + } + + +def _event_message( + *, + source: str, + detail_type: str, + detail: dict[str, object], + message_id: str, +) -> dict[str, object]: + event = { + "id": f"event-{message_id}", + "source": source, + "detail-type": detail_type, + "time": "2026-05-24T06:08:57Z", + "detail": detail, + } + sns = { + "MessageId": f"sns-{message_id}", + "Timestamp": "2026-05-24T06:08:58Z", + "Message": json.dumps(event), + } + return { + "MessageId": message_id, + "Body": json.dumps(sns), + "Attributes": {"SentTimestamp": "1779602937768"}, + } + + +def test_alerts_fingerprint_ignores_per_occurrence_ids() -> None: + first = {"Messages": [_message(backup_job_id="job-1", message_id="sqs-1")]} + second = {"Messages": [_message(backup_job_id="job-2", message_id="sqs-2")]} + + assert triage.alerts_fingerprint(first) == triage.alerts_fingerprint(second) + + +def test_alert_groups_split_distinct_streams() -> None: + alerts = { + "Messages": [ + _message(backup_job_id="job-1", resource_arn="arn:aws:s3:::one"), + _message(backup_job_id="job-2", resource_arn="arn:aws:s3:::one"), + _message(backup_job_id="job-3", resource_arn="arn:aws:s3:::two"), + ] + } + + groups = triage.grouped_alerts_payload(alerts)["groups"] + + assert sorted(group["messageCount"] for group in groups) == [1, 2] # nosec B101 + assert groups[0]["fingerprint"] != groups[1]["fingerprint"] # nosec B101 + + +def test_non_backup_fingerprint_uses_stable_detail_without_occurrence_ids() -> None: + first = { + "Messages": [ + _event_message( + source="aws.kms", + detail_type="AWS API Call via CloudTrail", + detail={ + "eventSource": "kms.amazonaws.com", + "eventName": "DisableKey", + "requestID": "request-1", + "requestParameters": {"keyId": "arn:aws:kms:::key/one"}, + }, + message_id="sqs-1", + ) + ] + } + same_stream = { + "Messages": [ + _event_message( + source="aws.kms", + detail_type="AWS API Call via CloudTrail", + detail={ + "eventSource": "kms.amazonaws.com", + "eventName": "DisableKey", + "requestID": "request-2", + "requestParameters": {"keyId": "arn:aws:kms:::key/one"}, + }, + message_id="sqs-2", + ) + ] + } + distinct_stream = { + "Messages": [ + _event_message( + source="aws.kms", + detail_type="AWS API Call via CloudTrail", + detail={ + "eventSource": "kms.amazonaws.com", + "eventName": "ScheduleKeyDeletion", + "requestID": "request-3", + "requestParameters": {"keyId": "arn:aws:kms:::key/one"}, + }, + message_id="sqs-3", + ) + ] + } + + assert triage.alerts_fingerprint(first) == triage.alerts_fingerprint(same_stream) + assert triage.alerts_fingerprint(first) != triage.alerts_fingerprint( # nosec B101 + distinct_stream + ) + + +def test_stable_detail_handles_lists_and_unknown_objects() -> None: + class CustomValue: + def __str__(self) -> str: + return "custom-value" + + detail = { + "items": [{"id": "volatile", "name": "kept"}, CustomValue()], + "requestID": "volatile", + } + + assert triage.stable_detail(detail) == { # nosec B101 + "items": [{"name": "kept"}, "custom-value"] + } + + +def test_aggregate_fingerprint_combines_multiple_streams() -> None: + alerts = { + "Messages": [ + _message(backup_job_id="job-1", resource_arn="arn:aws:s3:::one"), + _message(backup_job_id="job-2", resource_arn="arn:aws:s3:::two"), + ] + } + + fingerprint = triage.alerts_fingerprint(alerts) + group_fingerprints = [ + group["fingerprint"] + for group in triage.grouped_alerts_payload(alerts)["groups"] + ] + + assert fingerprint not in group_fingerprints # nosec B101 + assert fingerprint != "empty" # nosec B101 + + +def test_load_json_handles_invalid_and_non_object_values() -> None: + assert triage.load_json({"answer": 42}) == {"answer": 42} + assert triage.load_json("") == {} + assert triage.load_json("{not-json") == {} + assert triage.load_json("[1, 2, 3]") == {} + + +def test_safe_value_and_empty_fingerprint_defaults() -> None: + assert triage.safe_value(None) == "unknown" + assert triage.alerts_fingerprint({}) == "empty" + assert triage.alerts_fingerprint({"Messages": "not-a-list"}) == "empty" + + +def test_malformed_messages_are_skipped_safely() -> None: + alerts = { + "Messages": [ + "not-a-message", + None, + { + "MessageId": "sqs-safe", + "Body": "{not-json", + "Attributes": "not-a-dict", + }, + ] + } + fingerprint = triage.alerts_fingerprint(alerts) + + body = triage.render_issue_body( + alerts, + triage.IssueContext( + queue_name="queue", + account_id="123456789012", + region="eu-central-1", + fingerprint=fingerprint, + ), + ) + + assert fingerprint != "empty" + assert "contains 1 message(s)" in body + assert "sqsMessageId: `sqs-safe`" in body + assert "snsMessageId: `unknown`" in body + assert "sentTimestamp: `unknown`" in body + + +def test_render_issue_body_includes_only_sanitized_metadata() -> None: + alerts = {"Messages": [_message(backup_job_id="job-1")]} + fingerprint = triage.alerts_fingerprint(alerts) + + body = triage.render_issue_body( + alerts, + triage.IssueContext( + queue_name="bootstrap-test-operations-alerts", + account_id="123456789012", + region="eu-central-1", + fingerprint=fingerprint, + ), + ) + + assert f"operations-alert:fingerprint={fingerprint}" in body + assert "sqsMessageId: `sqs-1`" in body + assert "eventSource: `aws.backup`" in body + assert "Backup Job State Change" in body + assert "backupJobId" not in body + assert "resourceArn" not in body + + +def test_main_writes_body_and_fingerprint_files(tmp_path: Path) -> None: + alerts_path = tmp_path / "alerts.json" + body_path = tmp_path / "body.md" + fingerprint_path = tmp_path / "fingerprint" + groups_path = tmp_path / "groups.json" + alerts_path.write_text( + json.dumps({"Messages": [_message(backup_job_id="job-1")]}), + encoding="utf-8", + ) + + assert ( + triage.main( + [ + "--alerts-json", + str(alerts_path), + "--queue-name", + "queue", + "--account-id", + "123456789012", + "--region", + "eu-central-1", + "--body-file", + str(body_path), + "--fingerprint-file", + str(fingerprint_path), + "--groups-file", + str(groups_path), + ] + ) + == 0 + ) + + assert "operations-alert:fingerprint=" in body_path.read_text(encoding="utf-8") + grouped = json.loads(groups_path.read_text(encoding="utf-8")) + assert ( + fingerprint_path.read_text(encoding="utf-8").strip() + == grouped[ # nosec B101 + "groups" + ][0]["fingerprint"] + ) + assert grouped["groups"][0]["messageCount"] == 1 # nosec B101 + + +def test_main_writes_without_group_manifest(tmp_path: Path) -> None: + alerts_path = tmp_path / "alerts.json" + body_path = tmp_path / "body.md" + fingerprint_path = tmp_path / "fingerprint" + alerts_path.write_text( + json.dumps({"Messages": [_message(backup_job_id="job-1")]}), + encoding="utf-8", + ) + + assert ( + triage.main( + [ + "--alerts-json", + str(alerts_path), + "--queue-name", + "queue", + "--account-id", + "123456789012", + "--region", + "eu-central-1", + "--body-file", + str(body_path), + "--fingerprint-file", + str(fingerprint_path), + ] + ) + == 0 + ) + assert body_path.exists() # nosec B101 + assert fingerprint_path.read_text(encoding="utf-8").strip() # nosec B101 + + +def test_main_rejects_invalid_alerts_json(tmp_path: Path, capsys) -> None: + alerts_path = tmp_path / "alerts.json" + body_path = tmp_path / "body.md" + fingerprint_path = tmp_path / "fingerprint" + alerts_path.write_text("[1, 2, 3]", encoding="utf-8") + + assert ( + triage.main( + [ + "--alerts-json", + str(alerts_path), + "--queue-name", + "queue", + "--account-id", + "123456789012", + "--region", + "eu-central-1", + "--body-file", + str(body_path), + "--fingerprint-file", + str(fingerprint_path), + ] + ) + == 1 + ) + + assert str(alerts_path) in capsys.readouterr().err + assert not body_path.exists() + assert not fingerprint_path.exists() + + +def test_main_rejects_malformed_alerts_json(tmp_path: Path, capsys) -> None: + alerts_path = tmp_path / "alerts.json" + body_path = tmp_path / "body.md" + fingerprint_path = tmp_path / "fingerprint" + alerts_path.write_text("{not-json", encoding="utf-8") + + assert ( + triage.main( + [ + "--alerts-json", + str(alerts_path), + "--queue-name", + "queue", + "--account-id", + "123456789012", + "--region", + "eu-central-1", + "--body-file", + str(body_path), + "--fingerprint-file", + str(fingerprint_path), + ] + ) + == 1 + ) + + error = capsys.readouterr().err + assert str(alerts_path) in error + assert "JSON object" in error + assert not body_path.exists() + assert not fingerprint_path.exists() diff --git a/tests/unit/test_script_entrypoints.py b/tests/unit/test_script_entrypoints.py index 3befe39..1bc345c 100644 --- a/tests/unit/test_script_entrypoints.py +++ b/tests/unit/test_script_entrypoints.py @@ -2745,6 +2745,10 @@ def test_render_well_architected_closeout_writes_owner_handoff( "GITHUB_REPOSITORY_CONTROLS_MODE=--verify-only " "make configure-github-repository-controls" in text ) + assert ( # nosec B101 + "`prod` plus `operations-alert-reconcile` environments require an " + "independent reviewer" in text + ) assert ( # nosec B101 "gh variable set DEPENDABOT_EXCEPTION_EVIDENCE " "--repo VilnaCRM-Org/bootstrap-infrastructure " @@ -2957,6 +2961,9 @@ def test_configure_github_repository_controls_payloads( "custom_branch_policies": False, }, } + assert module.operations_alert_reconcile_environment_payload( # nosec B101 + 9444106 + ) == module.prod_environment_payload(9444106) monkeypatch.setattr(module, "_main_ruleset", lambda _repo: None) monkeypatch.setattr(module, "_github_user_id", lambda _reviewer: 9444106) @@ -2965,7 +2972,13 @@ def test_configure_github_repository_controls_payloads( ) rendered = json.loads(capsys.readouterr().out) assert rendered["prodEnvironment"]["reviewers"][0]["id"] == 9444106 # nosec B101 + assert rendered["operationsAlertReconcileEnvironment"]["reviewers"][0]["id"] == ( # nosec B101 + 9444106 + ) assert rendered["prodEnvironmentReviewerLogin"] == "Kravalg" # nosec B101 + assert rendered["operationsAlertReconcileEnvironmentReviewerLogin"] == ( # nosec B101 + "Kravalg" + ) assert ( # nosec B101 module.main(["--repo", "VilnaCRM-Org/bootstrap-infrastructure", "--dry-run"]) @@ -2975,7 +2988,14 @@ def test_configure_github_repository_controls_payloads( assert ( # nosec B101 dry_run_rendered["prodEnvironment"]["reviewers"][0]["id"] == 9444106 ) + assert ( # nosec B101 + dry_run_rendered["operationsAlertReconcileEnvironment"]["reviewers"][0]["id"] + == 9444106 + ) assert dry_run_rendered["prodEnvironmentReviewerLogin"] == "Kravalg" # nosec B101 + assert dry_run_rendered["operationsAlertReconcileEnvironmentReviewerLogin"] == ( # nosec B101 + "Kravalg" + ) with pytest.raises(SystemExit): module.main(["--repo", "example/repo", "--apply", "--dry-run"]) @@ -2983,6 +3003,12 @@ def test_configure_github_repository_controls_payloads( module.main(["--repo", "example/repo", "--apply", "--verify-only"]) with pytest.raises(SystemExit): module.main(["--repo", "example/repo", "--dry-run", "--verify-only"]) + with pytest.raises(SystemExit) as help_exit: + module.main(["--help"]) + assert help_exit.value.code == 0 # nosec B101 + help_text = capsys.readouterr().out + assert "protected environment payloads" in help_text # nosec B101 + assert "protected environment controls" in help_text # nosec B101 def test_configure_github_repository_controls_verification_helpers( @@ -3017,6 +3043,12 @@ def test_configure_github_repository_controls_verification_helpers( ) == [] ) + assert ( # noqa: SLF001 # nosec B101 + module._operations_alert_reconcile_environment_verification_blockers( + environment, 9444106 + ) + == [] + ) missing_self_review_environment = { "deployment_branch_policy": { "protected_branches": True, @@ -3125,6 +3157,9 @@ def test_configure_github_repository_controls_verification_helpers( assert module._prod_environment_verification_blockers( # noqa: SLF001 # nosec B101 None, 9444106 ) == ["Production environment was not readable after apply."] + assert module._operations_alert_reconcile_environment_verification_blockers( # noqa: SLF001 # nosec B101 + None, 9444106 + ) == ["Operations alert reconcile environment was not readable after apply."] monkeypatch.setattr(module, "_main_ruleset", lambda _repo: ruleset) monkeypatch.setattr( @@ -3138,6 +3173,8 @@ def test_configure_github_repository_controls_verification_helpers( "requiredStatusChecks": sorted(module.REQUIRED_STATUS_CHECKS), "prodReviewerId": 9444106, "prodEnvironment": "prod", + "operationsAlertReconcileReviewerId": 9444106, + "operationsAlertReconcileEnvironment": "operations-alert-reconcile", } monkeypatch.setattr(module, "_main_ruleset", lambda _repo: bad_ruleset) @@ -3152,6 +3189,10 @@ def fail_environment_read(_args, **_kwargs): module._verify_applied_controls("example/repo", 9444106) # noqa: SLF001 combined_error = str(exc_info.value) assert "missing required status checks" in combined_error # nosec B101 + assert "prod environment was not readable" in combined_error # nosec B101 + assert ( # nosec B101 + "operations-alert-reconcile environment was not readable" in combined_error + ) assert "gh: Not Found" in combined_error # nosec B101 monkeypatch.setattr(module, "_main_ruleset", lambda _repo: ruleset) @@ -3183,14 +3224,24 @@ def test_configure_github_repository_controls_verify_only( "_verify_applied_controls", lambda repo, reviewer_id: ( verifications.append((repo, reviewer_id)) - or {"prodEnvironment": "prod", "prodReviewerId": reviewer_id} + or { + "prodEnvironment": "prod", + "prodReviewerId": reviewer_id, + "operationsAlertReconcileEnvironment": "operations-alert-reconcile", + "operationsAlertReconcileReviewerId": reviewer_id, + } ), ) module.configure("example/repo", "Kravalg", apply=False, verify_only=True) rendered = json.loads(capsys.readouterr().out) assert rendered == { # nosec B101 - "verification": {"prodEnvironment": "prod", "prodReviewerId": 9444106} + "verification": { + "prodEnvironment": "prod", + "prodReviewerId": 9444106, + "operationsAlertReconcileEnvironment": "operations-alert-reconcile", + "operationsAlertReconcileReviewerId": 9444106, + } } assert ( # nosec B101 @@ -3418,6 +3469,19 @@ def test_configure_github_repository_controls_apply_paths( verifications.append((repo, reviewer_id)) or {"verified": True} ), ) + monkeypatch.setattr( + module, + "prod_environment_payload", + lambda reviewer_id: {"environment": "prod", "reviewerId": reviewer_id}, + ) + monkeypatch.setattr( + module, + "operations_alert_reconcile_environment_payload", + lambda reviewer_id: { + "environment": "operations-alert-reconcile", + "reviewerId": reviewer_id, + }, + ) def fake_run_gh_api(args, *, input_payload=None): calls.append((list(args), dict(input_payload or {}))) @@ -3437,8 +3501,21 @@ def fake_run_gh_api(args, *, input_payload=None): "--method", "PUT", ] + assert calls[2][0] == [ # nosec B101 + "repos/example/repo/environments/operations-alert-reconcile", + "--method", + "PUT", + ] + assert calls[1][1] == {"environment": "prod", "reviewerId": 9444106} # nosec B101 + assert calls[2][1] == { # nosec B101 + "environment": "operations-alert-reconcile", + "reviewerId": 9444106, + } rendered = json.loads(capsys.readouterr().out) - assert rendered["prodEnvironment"]["reviewers"][0]["id"] == 9444106 # nosec B101 + assert rendered["prodEnvironment"]["environment"] == "prod" # nosec B101 + assert rendered["operationsAlertReconcileEnvironment"]["environment"] == ( # nosec B101 + "operations-alert-reconcile" + ) assert rendered["verification"] == {"verified": True} # nosec B101 assert verifications == [("example/repo", 9444106)] # nosec B101 @@ -3447,6 +3524,21 @@ def fake_run_gh_api(args, *, input_payload=None): monkeypatch.setattr(module, "_main_ruleset", lambda _repo: None) module.configure("example/repo", "Kravalg", apply=True) assert calls[0][0] == ["repos/example/repo/rulesets", "--method", "POST"] # nosec B101 + assert calls[1][0] == [ # nosec B101 + "repos/example/repo/environments/prod", + "--method", + "PUT", + ] + assert calls[2][0] == [ # nosec B101 + "repos/example/repo/environments/operations-alert-reconcile", + "--method", + "PUT", + ] + assert calls[1][1] == {"environment": "prod", "reviewerId": 9444106} # nosec B101 + assert calls[2][1] == { # nosec B101 + "environment": "operations-alert-reconcile", + "reviewerId": 9444106, + } assert verifications == [("example/repo", 9444106)] # nosec B101 @@ -4571,7 +4663,10 @@ def test_collect_well_architected_evidence_success_path( # noqa: C901 { "id": "production_approval", "status": "passed", - "evidence": ["Protected prod environment verified."], + "evidence": [ + "Protected prod and operations-alert-reconcile " + "environments verified." + ], }, { "id": "quota_headroom", @@ -8522,7 +8617,7 @@ def fake_runner(command, **kwargs): ) -def test_run_up_plan_stack_recovers_from_plan_decrypt( +def test_run_up_plan_stack_rejects_plan_decrypt_without_direct_apply( monkeypatch: pytest.MonkeyPatch, tmp_path: Path, capsys: pytest.CaptureFixture[str] ) -> None: """Known saved-plan decrypt failures should not bypass the saved plan.""" @@ -8619,10 +8714,10 @@ def fake_runner(command, **kwargs): ) -def test_run_up_stack_recovers_from_lock( +def test_run_up_stack_does_not_retry_after_lock( monkeypatch: pytest.MonkeyPatch, tmp_path: Path ) -> None: - """Guarded direct applies should cancel one stale stack lock and retry.""" + """Direct applies should not auto-cancel stack locks and retry.""" module = load_script_module(monkeypatch, "run_pulumi_command") context_dir = tmp_path / "repo" applied: list[list[str]] = [] @@ -8644,7 +8739,7 @@ def fake_runner(command, **kwargs): context = module.CommandContext( root_dir=context_dir, - env={"GITHUB_ACTIONS": "true", "PULUMI_EXPECTED_SHA": "b" * 40}, + env={}, pulumi_dir=context_dir / "pulumi", policy_pack_dir=context_dir / "policy", plan_dir=context_dir / ".artifacts" / "pulumi-plan", @@ -8654,11 +8749,44 @@ def fake_runner(command, **kwargs): runner=fake_runner, ) - assert module._run_up_stack(context, "test") is None - assert any( # nosec B101 + assert module._run_up_stack(context, "test") == 255 + assert not any( # nosec B101 len(command) > 3 and command[3] == "cancel" for command in applied ) - assert up_attempts == 2 # nosec B101 + assert up_attempts == 1 # nosec B101 + + +def test_run_up_stack_rejects_direct_apply_in_github_actions( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + """GitHub applies must use a reviewed saved plan.""" + module = load_script_module(monkeypatch, "run_pulumi_command") + context_dir = tmp_path / "repo" + calls: list[list[str]] = [] + + def fake_runner(command, **kwargs): + calls.append(command) + return subprocess.CompletedProcess(command, 0, stdout="") + + context = module.CommandContext( + root_dir=context_dir, + env={"GITHUB_ACTIONS": "true", "PULUMI_EXPECTED_SHA": "b" * 40}, + pulumi_dir=context_dir / "pulumi", + policy_pack_dir=context_dir / "policy", + plan_dir=context_dir / ".artifacts" / "pulumi-plan", + preview_artifact_dir=context_dir / ".artifacts" / "pulumi-preview", + backend_url="file:///tmp/backend", + secrets_provider="awskms://alias/example?region=eu-central-1", + runner=fake_runner, + ) + + assert module._run_up_stack(context, "test") == 1 # nosec B101 + assert "direct Pulumi up is disabled in GitHub Actions" in ( # nosec B101 + capsys.readouterr().err + ) + assert calls == [] # nosec B101 def test_run_pulumi_command_observable_output_paths( @@ -8810,17 +8938,15 @@ def ci_success_runner(command, **kwargs): secrets_provider="awskms://alias/example?region=eu-central-1", runner=ci_success_runner, ) - assert module._run_up_stack(ci_success_context, "test") is None - assert any( # nosec B101 - len(command) > 3 and command[3] == "up" for command in ci_success_calls - ) + assert module._run_up_stack(ci_success_context, "test") == 1 # nosec B101 + assert ci_success_calls == [] # nosec B101 def direct_failure_runner(command, **kwargs): return subprocess.CompletedProcess(command, 17, stdout="", stderr="boom") failed_context = module.CommandContext( root_dir=context_dir, - env={"GITHUB_ACTIONS": "true", "PULUMI_EXPECTED_SHA": "f" * 40}, + env={}, pulumi_dir=context_dir / "pulumi", policy_pack_dir=context_dir / "policy", plan_dir=context_dir / ".artifacts" / "pulumi-plan", diff --git a/tests/unit/test_validate_ci_environment.py b/tests/unit/test_validate_ci_environment.py new file mode 100644 index 0000000..6b71f30 --- /dev/null +++ b/tests/unit/test_validate_ci_environment.py @@ -0,0 +1,271 @@ +from __future__ import annotations + +import importlib +import os +import sys +from pathlib import Path + +SCRIPTS_DIR = Path(__file__).resolve().parents[2] / "scripts" +if str(SCRIPTS_DIR) not in sys.path: + sys.path.insert(0, str(SCRIPTS_DIR)) + +validator = importlib.import_module("validate_ci_environment") + + +def _valid_environment() -> dict[str, str]: + return { + "AWS_ACCOUNT_ID": "123456789012", + "AWS_REGION": "eu-central-1", + "AWS_PREVIEW_ROLE_ARN": "arn:aws:iam::123456789012:role/Preview", + "AWS_APPLY_ROLE_ARN": "arn:aws:iam::123456789012:role/Apply", + "AWS_DRIFT_ROLE_ARN": "arn:aws:iam::123456789012:role/Drift", + "AWS_OPERATIONS_ALERT_TRIAGE_ROLE_ARN": ( + "arn:aws:iam::123456789012:role/OperationsAlertTriage" + ), + "PULUMI_BACKEND_URL": "s3://pulumi-bootstrap-infrastructure-test/state/test", + "PULUMI_SECRETS_PROVIDER": ( + "awskms://alias/pulumi-platform-bootstrap-test?region=eu-central-1" + ), + "PULUMI_PREVIEW_STACKS": "test,prod", + "PULUMI_DRIFT_STACKS": "test", + "OPERATIONS_TOPIC_ARN": "arn:aws:sns:eu-central-1:123456789012:bootstrap-test", + "OPERATIONS_ALERT_QUEUE_NAME": "bootstrap-test-operations-alerts", + "OPERATIONS_CLOUDTRAIL_NAME": "bootstrap-test-management-events", + } + + +def test_parse_required_keys_strips_blank_items() -> None: + assert validator.parse_required_keys(" AWS_ACCOUNT_ID, ,AWS_REGION ") == ( + "AWS_ACCOUNT_ID", + "AWS_REGION", + ) + + +def test_validate_environment_accepts_aws_secrets_manager_derived_values() -> None: + keys = validator.parse_required_keys( + "AWS_ACCOUNT_ID,AWS_REGION,AWS_PREVIEW_ROLE_ARN," + "PULUMI_BACKEND_URL,PULUMI_SECRETS_PROVIDER,PULUMI_PREVIEW_STACKS" + ) + + assert validator.validate_environment(keys, _valid_environment()) == [] + + +def test_validate_environment_rejects_missing_or_blank_values() -> None: + keys = ("AWS_ACCOUNT_ID", "AWS_REGION") + environment = {"AWS_ACCOUNT_ID": " "} + + issues = validator.validate_environment(keys, environment) + + assert [issue.name for issue in issues] == ["AWS_ACCOUNT_ID", "AWS_REGION"] + assert {issue.message for issue in issues} == {"is required"} + + +def test_validate_environment_rejects_unsafe_shapes() -> None: + environment = { + **_valid_environment(), + "AWS_ACCOUNT_ID": "not-account", + "AWS_REGION": "central", + "AWS_PREVIEW_ROLE_ARN": "arn:aws:iam::123456789012:user/not-role", + "PULUMI_BACKEND_URL": "file:///tmp/backend", + "PULUMI_SECRETS_PROVIDER": "passphrase", + "PULUMI_PREVIEW_STACKS": "test,$(secret)", + } + keys = validator.parse_required_keys( + "AWS_ACCOUNT_ID,AWS_REGION,AWS_PREVIEW_ROLE_ARN," + "PULUMI_BACKEND_URL,PULUMI_SECRETS_PROVIDER,PULUMI_PREVIEW_STACKS" + ) + + issues = validator.validate_environment(keys, environment) + + assert {issue.name for issue in issues} == { + "AWS_ACCOUNT_ID", + "AWS_REGION", + "AWS_PREVIEW_ROLE_ARN", + "PULUMI_BACKEND_URL", + "PULUMI_SECRETS_PROVIDER", + "PULUMI_PREVIEW_STACKS", + } + + +def test_validate_environment_accepts_job_specific_and_unknown_keys() -> None: + keys = validator.parse_required_keys( + "OPERATIONS_TOPIC_ARN,OPERATIONS_ALERT_QUEUE_NAME," + "OPERATIONS_CLOUDTRAIL_NAME,UNVALIDATED_METADATA" + ) + environment = {**_valid_environment(), "UNVALIDATED_METADATA": "value"} + + assert validator.validate_environment(keys, environment) == [] + + +def test_validate_environment_rejects_job_specific_shapes() -> None: + environment = { + **_valid_environment(), + "OPERATIONS_TOPIC_ARN": "not-an-arn", + "OPERATIONS_ALERT_QUEUE_NAME": "bad/resource/name", + "OPERATIONS_CLOUDTRAIL_NAME": "", + } + keys = validator.parse_required_keys( + "OPERATIONS_TOPIC_ARN,OPERATIONS_ALERT_QUEUE_NAME,OPERATIONS_CLOUDTRAIL_NAME" + ) + + issues = validator.validate_environment(keys, environment) + + assert {issue.name for issue in issues} == { # nosec B101 + "OPERATIONS_CLOUDTRAIL_NAME" + } + + environment["OPERATIONS_CLOUDTRAIL_NAME"] = "bootstrap-test-management-events" + issues = validator.validate_environment(keys, environment) + + assert {issue.name for issue in issues} == { # nosec B101 + "OPERATIONS_TOPIC_ARN", + "OPERATIONS_ALERT_QUEUE_NAME", + } + + +def test_write_github_environment_skips_missing_output_and_region( + tmp_path: Path, +) -> None: + validator.write_github_environment({}, None) + + github_env = tmp_path / "github-env" + validator.write_github_environment({}, str(github_env)) + + assert github_env.read_text(encoding="utf-8") == "" + + +def test_write_github_environment_sets_default_region(tmp_path: Path) -> None: + github_env = tmp_path / "github-env" + + validator.write_github_environment( + {"AWS_REGION": "eu-central-1"}, + str(github_env), + ) + + assert github_env.read_text(encoding="utf-8") == "AWS_DEFAULT_REGION=eu-central-1\n" + + +def test_write_github_environment_rejects_multiline_region(tmp_path: Path) -> None: + github_env = tmp_path / "github-env" + + try: + validator.write_github_environment( + {"AWS_REGION": "eu-central-1\nINJECTED=value"}, + str(github_env), + ) + except ValueError as exc: + assert "newline" in str(exc) # nosec B101 + else: # pragma: no cover + raise AssertionError("expected multiline region rejection") + + assert not github_env.exists() + + +def test_main_reports_errors_without_printing_values(capsys) -> None: + original_environ = dict(os.environ) + try: + os.environ.clear() + os.environ.update({"AWS_ACCOUNT_ID": "not-account"}) + + assert ( + validator.main( + [ + "--purpose", + "unit", + "--required-keys", + "AWS_ACCOUNT_ID,AWS_REGION", + ] + ) + == 1 + ) + finally: + os.environ.clear() + os.environ.update(original_environ) + + output = capsys.readouterr().out + assert "AWS_REGION is required" in output + assert "not-account" not in output + + +def test_main_rejects_empty_required_keys(capsys) -> None: + assert ( + validator.main( + [ + "--purpose", + "unit", + "--required-keys", + " , ", + ] + ) + == 1 + ) + + assert "required-keys must include at least one" in capsys.readouterr().out + + +def test_main_writes_default_region_and_summary(tmp_path: Path, capsys) -> None: + original_environ = dict(os.environ) + github_env = tmp_path / "github-env" + secret_id = "/bootstrap-infrastructure/ci/test" + try: + os.environ.clear() + os.environ.update( + { + **_valid_environment(), + "GITHUB_ENV": str(github_env), + "CI_CONFIG_SECRET_ID": secret_id, + } + ) + + assert ( + validator.main( + [ + "--purpose", + "unit", + "--required-keys", + "AWS_ACCOUNT_ID,AWS_REGION,PULUMI_BACKEND_URL," + "PULUMI_SECRETS_PROVIDER", + ] + ) + == 0 + ) + finally: + os.environ.clear() + os.environ.update(original_environ) + + assert github_env.read_text(encoding="utf-8") == "AWS_DEFAULT_REGION=eu-central-1\n" + output = capsys.readouterr().out + assert "using a fixed CI secret" in output + assert secret_id not in output + + +def test_main_rejects_multiline_github_env_write(tmp_path: Path, capsys) -> None: + original_environ = dict(os.environ) + github_env = tmp_path / "github-env" + try: + os.environ.clear() + os.environ.update( + { + **_valid_environment(), + "AWS_REGION": "eu-central-1\nINJECTED=value", + "GITHUB_ENV": str(github_env), + } + ) + + assert ( + validator.main( + [ + "--purpose", + "unit", + "--required-keys", + "AWS_ACCOUNT_ID", + ] + ) + == 1 + ) + finally: + os.environ.clear() + os.environ.update(original_environ) + + assert "AWS_REGION must not contain newline" in capsys.readouterr().out + assert not github_env.exists()