diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3d212fd..27b8c15 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -8,10 +8,10 @@ on: types: [opened, synchronize, reopened] workflow_dispatch: inputs: - run_acceptance: - description: "Run acceptance tests" - type: boolean - default: false + k8s_versions: + description: "Optional explicit Kubernetes versions (comma-separated, e.g. v1.31.0,v1.30.0). Leave blank to use the K8S_MATRIX_VERSIONS repository variable." + required: false + default: "" permissions: contents: read @@ -129,10 +129,70 @@ jobs: echo "✅ All tests completed successfully!" echo "================================" + # --------------------------------------------------------------------------- + # Resolve the Kubernetes versions to test into a JSON array consumed by the + # acceptance-test matrix below. The workflow_dispatch input takes precedence, + # otherwise the repository variable K8S_MATRIX_VERSIONS (ordered kindest/node + # tags) is used. Fails if neither is set. + # --------------------------------------------------------------------------- + acceptance-matrix: + name: Acceptance Matrix + if: | + github.event_name == 'workflow_dispatch' || + (github.event_name == 'pull_request' && + github.base_ref == 'main' && + github.event.pull_request.draft == false) + runs-on: ubuntu-latest + needs: [unit-test, security] + outputs: + versions: ${{ steps.resolve.outputs.versions }} + steps: + - name: Resolve configured Kubernetes versions + id: resolve + env: + INPUT_K8S_VERSIONS: ${{ github.event.inputs.k8s_versions || '' }} + REPO_K8S_MATRIX_VERSIONS: ${{ vars.K8S_MATRIX_VERSIONS }} + run: | + # Configure the K8S_MATRIX_VERSIONS repository variable with ordered + # kindest/node tags. Available tags: https://hub.docker.com/r/kindest/node + raw_versions="${INPUT_K8S_VERSIONS:-${REPO_K8S_MATRIX_VERSIONS:-}}" + normalized_versions="$(echo "${raw_versions}" | tr ',' ' ' | xargs)" + if [ -z "${normalized_versions}" ]; then + echo "K8S_MATRIX_VERSIONS repository variable is required when workflow_dispatch input k8s_versions is empty." >&2 + exit 1 + fi + + read -ra versions <<< "${normalized_versions}" + + json="[" + for version in "${versions[@]}"; do + printf -v json '%s"%s",' "${json}" "${version}" + done + json="${json%,}]" + + echo "versions=${json}" >> "${GITHUB_OUTPUT}" + echo "Resolved acceptance matrix: ${json}" + + # --------------------------------------------------------------------------- + # One isolated runner per Kubernetes version. fail-fast: false so a failure in + # one version does not cancel the others. make k8s-matrix-test (driven by + # K8S_VERSIONS) creates a cluster pinned to kindest/node:, generates + # worker.hcl, installs the chart, runs the session test, then tears down + # (including ID-scoped Boundary worker cleanup). + # --------------------------------------------------------------------------- acceptance-test: - name: Acceptance Test + name: Acceptance Test (${{ matrix.k8s_version }}) + if: | + github.event_name == 'workflow_dispatch' || + (github.event_name == 'pull_request' && + github.base_ref == 'main' && + github.event.pull_request.draft == false) runs-on: ubuntu-latest - needs: unit-test + needs: acceptance-matrix + strategy: + fail-fast: false + matrix: + k8s_version: ${{ fromJSON(needs.acceptance-matrix.outputs.versions) }} env: BOUNDARY_ADDR: ${{ secrets.BOUNDARY_ADDR }} BOUNDARY_AUTH_METHOD_ID: ${{ secrets.BOUNDARY_AUTH_METHOD_ID }} @@ -140,6 +200,9 @@ jobs: BOUNDARY_LOGIN_NAME: ${{ secrets.BOUNDARY_LOGIN_NAME }} BOUNDARY_PASSWORD: ${{ secrets.BOUNDARY_PASSWORD }} BOUNDARY_TARGET_ID: ${{ secrets.BOUNDARY_TARGET_ID }} + BOUNDARY_BYOW_IMAGE: ${{ secrets.BOUNDARY_BYOW_IMAGE }} + # Selects the kindest/node image the matrix script pins the cluster to. + K8S_VERSIONS: ${{ matrix.k8s_version }} steps: - name: Checkout code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -147,14 +210,8 @@ jobs: - name: Setup acceptance environment run: make acceptance-setup - - name: Generate worker config - run: make worker-config - - - name: Install Helm chart and run Helm tests - run: make acceptance-helm - - - name: Run acceptance tests - run: make acceptance-test + - name: Run Kubernetes acceptance test for selected version + run: make k8s-matrix-test - name: Cleanup if: always() diff --git a/Makefile b/Makefile index 103ce7f..7f09b2f 100644 --- a/Makefile +++ b/Makefile @@ -7,13 +7,17 @@ ifneq (,$(wildcard .env)) export endif +# Always export the Kubernetes version matrix selection to recipe sub-shells, +# even when no .env file is present (e.g. `make k8s-matrix-test K8S_MATRIX_VERSIONS=...`). +export K8S_MATRIX_VERSIONS + # ================================ # PHONY Declarations # ================================ .PHONY: help format deps clean lint test unit-test worker-config .PHONY: setup-helm setup-kubeconform setup-trivy setup-kubescape setup-helm-unittest lint-helm-k8s trivy-scan kubescape-scan .PHONY: acceptance-setup acceptance-cluster acceptance-helm acceptance-test acceptance-full acceptance-cleanup -.PHONY: kind-matrix-test kind-matrix-cleanup +.PHONY: k8s-matrix-test k8s-matrix-cleanup .PHONY: eks-setup eks-helm eks-test eks-full eks-cleanup .PHONY: tf-setup tf-destroy tf-output tf-plan .PHONY: aks-setup aks-helm aks-test aks-full aks-cleanup @@ -53,8 +57,8 @@ help: @echo " make acceptance-test - Run acceptance tests" @echo " make acceptance-full - Run full acceptance workflow (setup + worker-config + helm + tests)" @echo " make acceptance-cleanup - Delete acceptance cluster" - @echo " make kind-matrix-test - Run tcp-target-conn-test.sh across the 2 KIND versions prior to latest (auto-resolved)" - @echo " make kind-matrix-cleanup - Delete the acceptance cluster and cached KIND binaries" + @echo " make k8s-matrix-test - Run tcp-target-conn-test.sh across kindest/node K8s versions (set K8S_MATRIX_VERSIONS or K8S_VERSIONS)" + @echo " make k8s-matrix-cleanup - Delete the acceptance cluster and generated worker config" @echo "" @echo "AWS EKS Acceptance Testing targets (shell-based, legacy):" @echo " make eks-setup - Provision EKS cluster via Terraform (tf-setup)" @@ -464,7 +468,8 @@ worker-config: fi; \ export ACTIVATION_TOKEN; \ if [ -n "$$WORKER_ID" ]; then \ - echo "✅ Created worker $$WORKER_ID"; \ + echo "$$WORKER_ID" > "$${BOUNDARY_WORKER_ID_FILE:-/tmp/boundary-worker-id.txt}"; \ + echo "✅ Created worker $$WORKER_ID (id saved for ID-scoped verification & cleanup)"; \ fi; \ echo ""; \ echo "Generating worker configuration from template..."; \ @@ -539,11 +544,14 @@ acceptance-test: @bash tests/acceptance/cluster-smoke-test.sh @bash tests/acceptance/tcp-target-conn-test.sh @bash tests/acceptance/cleanup-worker.sh - @bash tests/acceptance/kind-version-matrix-test.sh @echo "✅ All acceptance tests passed!" @echo "" +# Note: acceptance-full does NOT run the Kubernetes version matrix test. +# The matrix manages its own cluster lifecycle (it deletes/recreates the +# 'acceptance' cluster per version), so it is kept separate. Run it on its own: +# make k8s-matrix-test K8S_MATRIX_VERSIONS="v1.36.1 v1.35.5" acceptance-full: @echo "================================" @echo "Running Full Acceptance Workflow" @@ -562,32 +570,35 @@ acceptance-full: @echo "" # ================================ -# KIND Version Matrix Testing +# Kubernetes Version Matrix Testing # ================================ -kind-matrix-test: +k8s-matrix-test: @echo "================================" - @echo "KIND Version Matrix Test" - @echo "Versions: resolved dynamically from GitHub Releases" + @echo "Kubernetes Version Matrix Test" @echo "================================" - @chmod +x tests/acceptance/kind-version-matrix-test.sh - @bash tests/acceptance/kind-version-matrix-test.sh + @if [ -z "$(K8S_MATRIX_VERSIONS)" ] && [ -z "$(K8S_VERSIONS)" ]; then \ + echo "❌ Set K8S_MATRIX_VERSIONS (ordered kindest/node tags) or K8S_VERSIONS (one-off override)."; \ + echo " Example: make k8s-matrix-test K8S_MATRIX_VERSIONS=\"v1.31.0 v1.30.0\""; \ + echo " Available tags: https://hub.docker.com/r/kindest/node"; \ + exit 1; \ + fi + @chmod +x tests/acceptance/k8s-version-matrix-test.sh + @K8S_MATRIX_VERSIONS="$(K8S_MATRIX_VERSIONS)" K8S_VERSIONS="$(K8S_VERSIONS)" bash tests/acceptance/k8s-version-matrix-test.sh -kind-matrix-cleanup: +k8s-matrix-cleanup: @echo "================================" - @echo "KIND Matrix Cleanup" + @echo "K8s Matrix Cleanup" @echo "================================" - @find "$${TMPDIR:-/tmp}" -maxdepth 1 -name 'kind-v[0-9]*' 2>/dev/null | while read -r BIN; do \ - if [ -x "$$BIN" ] && "$$BIN" get clusters 2>/dev/null | grep -q "^acceptance$$"; then \ - echo "Deleting cluster using $$(basename $$BIN) binary..."; \ - "$$BIN" delete cluster --name acceptance; \ - fi; \ - rm -f "$$BIN"; \ - echo "✅ Removed cached $$(basename $$BIN) binary"; \ - done + @if kind get clusters 2>/dev/null | grep -q "^acceptance$$"; then \ + echo "Deleting acceptance cluster..."; \ + kind delete cluster --name acceptance; \ + else \ + echo "⚠️ Acceptance cluster does not exist"; \ + fi @rm -f worker.hcl @rm -f /tmp/boundary-worker-id.txt - @echo "✅ KIND matrix cleanup complete" + @echo "✅ K8s matrix cleanup complete" acceptance-cleanup: @echo "================================" diff --git a/docs/OPERATIONS.md b/docs/OPERATIONS.md index c973601..30fe3e7 100644 --- a/docs/OPERATIONS.md +++ b/docs/OPERATIONS.md @@ -736,8 +736,9 @@ Operational implications: └── tests/ ├── acceptance/ │ ├── cluster-smoke-test.sh + │ ├── k8s-matrix-config.yaml.tpl + │ ├── k8s-version-matrix-test.sh │ ├── kind-acceptance-config.yaml - │ ├── kind-version-matrix-test.sh │ └── tcp-target-conn-test.sh ├── integration/ │ ├── aks-integration-test.sh @@ -762,7 +763,8 @@ Key files: - `tests/unit/*_test.yaml`: Helm unit tests run with `helm-unittest` - `tests/acceptance/cluster-smoke-test.sh`: validates a KIND cluster is up and accessible - `tests/acceptance/tcp-target-conn-test.sh`: end-to-end session and TCP connection test -- `tests/acceptance/kind-version-matrix-test.sh`: runs `tcp-target-conn-test.sh` across multiple KIND versions +- `tests/acceptance/k8s-version-matrix-test.sh`: runs `tcp-target-conn-test.sh` across multiple Kubernetes versions +- `tests/acceptance/k8s-matrix-config.yaml.tpl`: KIND cluster template rendered per Kubernetes version by the matrix test - `tests/integration/`: EKS and AKS integration tests - `docs/TESTING.md`: full testing guide - `docs/FAQ.md`: frequently asked questions diff --git a/docs/TESTING.md b/docs/TESTING.md index 1ede61c..e63efdb 100644 --- a/docs/TESTING.md +++ b/docs/TESTING.md @@ -19,6 +19,8 @@ All tests share these base requirements: - `kubectl` CLI installed - `helm` CLI installed - `boundary` CLI installed +- `curl` installed +- `python3` installed (used to parse Boundary JSON output in the acceptance tests) - KIND for local cluster testing (`brew install kind`) Install all local dependencies at once: @@ -107,6 +109,10 @@ BOUNDARY_CLUSTER_ID="" # Required for TCP target connection test BOUNDARY_TARGET_ID="ttcp_" + +# Optional: default Kubernetes version matrix (space or comma separated kindest/node tags). +# Auto-loaded and exported by the Makefile; override per-run with K8S_VERSIONS. +# K8S_MATRIX_VERSIONS="v1.36.1 v1.35.5 v1.34.8" ``` #### 2. Generate a worker HCL configuration @@ -172,38 +178,69 @@ bash tests/acceptance/tcp-target-conn-test.sh --- -### KIND Version Matrix Test +### Kubernetes Version Matrix Test -Runs `tcp-target-conn-test.sh` across multiple KIND versions for Kubernetes compatibility validation. +Runs `tcp-target-conn-test.sh` across multiple Kubernetes versions to validate the chart against different Kubernetes API-server versions. Each version uses a KIND cluster pinned to the matching `kindest/node` image. ```bash -cd boundary-worker-helm -bash tests/acceptance/kind-version-matrix-test.sh +make k8s-matrix-test K8S_MATRIX_VERSIONS="v1.36.1 v1.35.5 v1.34.8" ``` -Or via `make`: +Test a single version (faster iteration): ```bash -make kind-matrix-test +make k8s-matrix-test K8S_VERSIONS="v1.36.1" ``` -**What it tests:** -- Full TCP target connection test across the two most recent stable KIND releases -- Automatically resolves latest KIND releases from the GitHub Releases API -- Falls back to hardcoded versions when offline (`v0.30.0`, `v0.29.0`) +**Version source:** +- `K8S_MATRIX_VERSIONS` — the configured list of `kindest/node` tags (space or comma separated). +- `K8S_VERSIONS` — a one-off override that takes precedence over `K8S_MATRIX_VERSIONS`. -**Duration:** ~15–20 minutes (runs the full test suite twice) +The run fails fast if neither is set. Available tags: https://hub.docker.com/r/kindest/node -**Process:** -1. Resolves two prior stable KIND versions (latest-1 and latest-2) -2. Downloads pinned KIND binaries (cached in `/tmp`) -3. Creates a fresh KIND cluster for each version using `kind-acceptance-config.yaml` +**What it tests:** +- The full TCP target connection test on every configured Kubernetes version +- Independent pass/fail per version (one version failing does not stop the others) + +**Process (per version):** +1. Deletes any leftover `acceptance` cluster +2. Creates a fresh cluster pinned to `kindest/node:`, rendered from `tests/acceptance/k8s-matrix-config.yaml.tpl` +3. Pre-loads the worker image into the node (arch-aware) 4. Generates a new `worker.hcl` via `make worker-config` -5. Installs the Helm chart via `make acceptance-helm` +5. Installs the Helm chart 6. Runs `tcp-target-conn-test.sh` with `TIMEOUT=600` -7. Tears down the cluster -8. Repeats for the next version -9. Prints a per-version pass/fail summary +7. Tears down the cluster and removes the worker registration from Boundary +8. Prints a per-version pass/fail summary at the end + +**Requirements:** +- KIND **v0.32.0+** locally — current node images (e.g. `v1.36.1`) require it; older KIND cannot load them and the worker pod will `ImagePullBackOff`. The `kindest/node` tags must match your installed KIND version (see each KIND release's notes). +- Same `.env` and target requirements as the TCP target connection test. + +**Duration:** ~5–10 minutes per version (run serially on one machine). + +**Configuring the matrix in CI:** + +In GitHub Actions the version list comes from a **repository variable** named `K8S_MATRIX_VERSIONS` (not a secret). The `acceptance-matrix` job hard-fails if it is unset, so you must create it before the workflow can run: + +1. Go to **Settings → Secrets and variables → Actions → Variables → New repository variable**. +2. Name: `K8S_MATRIX_VERSIONS` +3. Value: a space- or comma-separated list of `kindest/node` image tags, e.g. `v1.36.1 v1.35.5 v1.34.8`. + +Format and tag rules: +- Each entry is a `kindest/node` tag including the leading `v` (e.g. `v1.36.1`), **not** a bare `1.36`. +- Tags must be published for the KIND version CI installs (CI uses the latest KIND, so use the latest release's tags). Browse valid tags at https://hub.docker.com/r/kindest/node or the KIND release notes. +- For a one-off run you can override the variable with the `workflow_dispatch` **`k8s_versions`** input — it takes precedence over the repository variable for that run only. + +The same `BOUNDARY_*` secrets the TCP target connection test needs must also be configured as Actions secrets, otherwise the per-version jobs fail at worker registration. + +> In CI, these versions run **in parallel** — one job per version — via the `acceptance-matrix` and `acceptance-test` jobs in `.github/workflows/test.yml`, driven by the `K8S_MATRIX_VERSIONS` repository variable (or the `workflow_dispatch` `k8s_versions` input). + +Preview the resolved version list without creating any clusters: + +```bash +PRINT_RESOLVED_K8S_VERSIONS=true K8S_MATRIX_VERSIONS="v1.36.1 v1.35.5" \ + bash tests/acceptance/k8s-version-matrix-test.sh +``` --- @@ -227,17 +264,16 @@ make acceptance-cleanup This performs the following cleanup actions: 1. Deletes the worker registration from the Boundary cluster (prevents worker buildup) -2. Deletes the `acceptance` KIND cluster -3. Uninstalls the Helm release -4. Preserves cached KIND binaries in `/tmp` for faster subsequent test runs +2. Deletes the `acceptance` KIND cluster (removing the Helm release with it) +3. Removes the generated `worker.hcl` and the worker-ID file (`/tmp/boundary-worker-id.txt`) -To also remove cached KIND binaries (used by the matrix test): +To clean up after a matrix run: ```bash -make kind-matrix-cleanup +make k8s-matrix-cleanup ``` -This removes all downloaded KIND version binaries from `/tmp/kind-v*`. Use this when you want a completely clean state or to free up disk space. +This deletes the `acceptance` KIND cluster and removes the generated `worker.hcl` and worker-ID file (`/tmp/boundary-worker-id.txt`). --- @@ -334,6 +370,8 @@ The acceptance cluster is defined in `tests/acceptance/kind-acceptance-config.ya - 1 control-plane node, 2 worker nodes - Port 30000 and 30001 mapped from container to host for NodePort services +The Kubernetes version matrix test uses a separate template, `tests/acceptance/k8s-matrix-config.yaml.tpl`, with the same topology but a `__K8S_VERSION__` placeholder that the script substitutes with each `kindest/node` tag. Edit node roles / port mappings there to change the matrix cluster layout. + ### Timeout configuration The `tcp-target-conn-test.sh` script defaults to a 300-second deployment wait timeout. The matrix test overrides this to 600 seconds. You can override it manually: @@ -407,8 +445,8 @@ curl http://localhost:9203/health # Delete the KIND cluster kind delete cluster --name acceptance -# Clean up cached KIND binaries (optional) -rm -f /tmp/kind-v* +# Remove generated worker artifacts (optional) +rm -f worker.hcl /tmp/boundary-worker-id.txt ``` --- @@ -418,6 +456,7 @@ rm -f /tmp/kind-v* The tests are integrated into GitHub Actions workflows: - **PR validation**: `lint-helm-k8s`, unit tests, and Trivy / Kubescape scans run on every pull request +- **Acceptance matrix**: On non-draft PRs to `main` (and via manual dispatch), the worker is deployed and the TCP target connection test runs in parallel across every Kubernetes version in `K8S_MATRIX_VERSIONS` (`acceptance-matrix` / `acceptance-test` jobs in `test.yml`). Requires the `K8S_MATRIX_VERSIONS` repository variable and the `BOUNDARY_*` secrets. - **Push validation**: Runs on pushes to main branches - **Release validation**: Runs before creating releases @@ -445,13 +484,14 @@ See `.github/workflows/` for workflow configuration. ## Test Maintenance -### Updating KIND versions +### Updating Kubernetes versions -The matrix test automatically resolves the two latest stable KIND versions. To update fallback versions (used when offline), edit `tests/acceptance/kind-version-matrix-test.sh`: +The matrix test resolves versions from the `K8S_MATRIX_VERSIONS` repository variable (CI) or the `K8S_VERSIONS` / `K8S_MATRIX_VERSIONS` argument (local) — there are no hardcoded Kubernetes versions in the script. To change the tested set: -```bash -_FALLBACK_KIND_VERSIONS=("v0.30.0" "v0.29.0") -``` +- **CI:** update the `K8S_MATRIX_VERSIONS` repository variable (Settings → Secrets and variables → Actions → Variables), e.g. `v1.36.1 v1.35.5 v1.34.8`. +- **Local:** pass them on the command line, e.g. `make k8s-matrix-test K8S_MATRIX_VERSIONS="v1.36.1 v1.35.5"`. + +Use `kindest/node` tags that match your installed KIND version (each KIND release publishes a specific set). Available tags: https://hub.docker.com/r/kindest/node ### Updating test values diff --git a/tests/acceptance/cleanup-worker.sh b/tests/acceptance/cleanup-worker.sh index 203249a..090a07d 100755 --- a/tests/acceptance/cleanup-worker.sh +++ b/tests/acceptance/cleanup-worker.sh @@ -24,7 +24,7 @@ echo "Boundary Worker Cleanup" echo "" # Check if worker ID file exists -WORKER_ID_FILE="/tmp/boundary-worker-id.txt" +WORKER_ID_FILE="${BOUNDARY_WORKER_ID_FILE:-/tmp/boundary-worker-id.txt}" if [ ! -f "${WORKER_ID_FILE}" ]; then warn "Worker ID file not found at ${WORKER_ID_FILE}" warn "Worker may not have been registered or test did not complete" diff --git a/tests/acceptance/k8s-matrix-config.yaml.tpl b/tests/acceptance/k8s-matrix-config.yaml.tpl new file mode 100644 index 0000000..79acb37 --- /dev/null +++ b/tests/acceptance/k8s-matrix-config.yaml.tpl @@ -0,0 +1,30 @@ +# Copyright IBM Corp. 2026 +# +# KIND cluster topology for the Kubernetes version matrix test. +# +# k8s-version-matrix-test.sh renders this template once per version, replacing +# the version placeholder below with the requested kindest/node tag +# (e.g. v1.36.1) before creating the cluster. This way the same node layout and +# NodePort mappings are exercised across every Kubernetes version. +# +# Edit node roles / extraPortMappings here — the Kubernetes version is injected +# automatically, so do NOT hardcode a version in this file. +# Available node image tags: https://hub.docker.com/r/kindest/node + +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +name: acceptance +nodes: +- role: control-plane + image: kindest/node:__K8S_VERSION__ + extraPortMappings: + - containerPort: 30000 + hostPort: 30000 + protocol: TCP + - containerPort: 30001 + hostPort: 30001 + protocol: TCP +- role: worker + image: kindest/node:__K8S_VERSION__ +- role: worker + image: kindest/node:__K8S_VERSION__ diff --git a/tests/acceptance/kind-version-matrix-test.sh b/tests/acceptance/k8s-version-matrix-test.sh similarity index 52% rename from tests/acceptance/kind-version-matrix-test.sh rename to tests/acceptance/k8s-version-matrix-test.sh index 4a77d3b..cb279ba 100755 --- a/tests/acceptance/kind-version-matrix-test.sh +++ b/tests/acceptance/k8s-version-matrix-test.sh @@ -1,24 +1,25 @@ #!/bin/bash # Copyright IBM Corp. 2026 - -# KIND Version Matrix Test — TCP Target Connection -# Dynamically resolves the latest stable KIND release from GitHub, then runs -# tcp-target-conn-test.sh against the two versions immediately preceding it -# (latest-1 and latest-2). Falls back to hardcoded defaults when offline. -# For each version: -# 1. Downloads the pinned KIND binary (cached in /tmp) -# 2. Creates a fresh KIND cluster using kind-acceptance-config.yaml +# SPDX-License-Identifier: MPL-2.0 + +# Kubernetes Version Matrix Test — TCP Target Connection +# Tests tcp-target-conn-test.sh across configured kindest/node Kubernetes versions. +# Available tags reference: https://hub.docker.com/r/kindest/node +# +# For each Kubernetes version: +# 1. Creates a fresh KIND cluster pinned to kindest/node: +# 2. Pre-loads the worker image into the node (arch-aware) # 3. Generates a new worker.hcl via `make worker-config` # 4. Installs the Helm chart # 5. Runs tcp-target-conn-test.sh -# 6. Tears down the cluster +# 6. Runs cleanup-worker.sh and tears down the cluster # Prints a per-version pass/fail summary at the end. set -euo pipefail # -- Helpers -------------------------------------------------------------------- # All helpers write to stderr so they are safe to use inside $() subshells -# (e.g. download_kind) without polluting captured stdout. +# without polluting captured stdout. pass() { echo " ✅ $1" >&2; } fail() { echo "❌ FAILED: $1" >&2; exit 1; } info() { echo " $1" >&2; } @@ -28,67 +29,59 @@ header() { echo " $1" >&2 } -# -- Fallback versions (used when GitHub API is unreachable) ------------------- -_FALLBACK_KIND_VERSIONS=("v0.30.0" "v0.29.0") - -# -- resolve_kind_versions ----------------------------------------------------- -# Queries the GitHub Releases API for kubernetes-sigs/kind, sorts stable tags -# by semver descending, and returns the two versions immediately below the -# latest (latest-1 and latest-2) so the matrix always tests the two most -# recently released prior versions without any manual edits. -resolve_kind_versions() { - local raw - raw="$(curl -fsSL --retry 2 --connect-timeout 10 \ - "https://api.github.com/repos/kubernetes-sigs/kind/releases" 2>/dev/null)" || true - - if [ -z "${raw}" ]; then - warn "GitHub Releases API unreachable — using fallback KIND versions: ${_FALLBACK_KIND_VERSIONS[*]}" - echo "${_FALLBACK_KIND_VERSIONS[@]}" - return +# -- k8s_versions: resolve the Kubernetes node versions to test ----------------- +# Priority: +# - K8S_VERSIONS: explicit one-off override (comma or space separated) +# - K8S_MATRIX_VERSIONS: ordered repository-configured list +k8s_versions() { + if [ -n "${K8S_VERSIONS:-}" ]; then + local normalized + normalized="$(echo "${K8S_VERSIONS}" | tr ',' ' ' | xargs)" + local count + count="$(echo "${normalized}" | wc -w | tr -d ' ')" + if [ "${count}" -ge 1 ]; then + info "Using explicit versions from K8S_VERSIONS: ${normalized}" + echo "${normalized}" + return + fi fi - local output - output="$(printf '%s' "${raw}" | python3 -c " -import json, sys -releases = json.load(sys.stdin) -tags = sorted( - [r['tag_name'] for r in releases - if not r.get('prerelease', False) and r.get('tag_name', '').startswith('v')], - key=lambda v: [int(x) for x in v.lstrip('v').split('.')], - reverse=True -) -print(' '.join(tags[1:3])) -" 2>/dev/null)" || true - - local word_count - word_count="$(echo "${output}" | wc -w | tr -d ' ')" - if [ -z "${output}" ] || [ "${word_count}" -lt 2 ]; then - warn "Could not parse KIND releases — using fallback: ${_FALLBACK_KIND_VERSIONS[*]}" - echo "${_FALLBACK_KIND_VERSIONS[@]}" - return - fi + local configured="${K8S_MATRIX_VERSIONS:-}" + [ -n "${configured}" ] || fail "Set K8S_MATRIX_VERSIONS or K8S_VERSIONS before running. See https://hub.docker.com/r/kindest/node for available tags." - echo "${output}" + local normalized + normalized="$(echo "${configured}" | tr ',' ' ' | xargs)" + local count + count="$(echo "${normalized}" | wc -w | tr -d ' ')" + [ "${count}" -ge 1 ] || fail "K8S_MATRIX_VERSIONS did not contain any usable versions. See https://hub.docker.com/r/kindest/node for available tags." + + echo "${normalized}" } # -- Configuration -------------------------------------------------------------- -read -ra KIND_VERSIONS <<< "$(resolve_kind_versions)" +# k8s_versions() runs in a command substitution, so its fail() only exits that +# subshell; guard here so an empty result aborts the parent (otherwise the loop +# would run zero times and report a misleading "all passed"). +read -ra MATRIX_K8S_VERSIONS <<< "$(k8s_versions)" +[ "${#MATRIX_K8S_VERSIONS[@]}" -ge 1 ] || exit 1 KIND_CLUSTER_NAME="acceptance" SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" CHART_DIR="$(cd "${SCRIPT_DIR}/../.." && pwd)" -KIND_CONFIG="${SCRIPT_DIR}/kind-acceptance-config.yaml" +KIND_MATRIX_TEMPLATE="${SCRIPT_DIR}/k8s-matrix-config.yaml.tpl" TCP_TEST="${SCRIPT_DIR}/tcp-target-conn-test.sh" -KIND_CACHE_DIR="${TMPDIR:-/tmp}" -# -- OS / Architecture detection ------------------------------------------------ -_OS="$(uname -s | tr '[:upper:]' '[:lower:]')" +# Print the resolved versions and exit (used by tooling / CI to build a matrix). +if [ "${PRINT_RESOLVED_K8S_VERSIONS:-false}" = "true" ]; then + echo "${MATRIX_K8S_VERSIONS[*]}" + exit 0 +fi + +# -- Architecture detection (for arch-aware image preload) ---------------------- _ARCH="$(uname -m)" case "${_ARCH}" in x86_64) _ARCH="amd64" ;; arm64|aarch64) _ARCH="arm64" ;; - *) fail "Unsupported architecture: ${_ARCH}" ;; esac -KIND_PLATFORM="${_OS}-${_ARCH}" # -- Load .env (for Boundary credentials / BOUNDARY_* vars) -------------------- if [ -f "${CHART_DIR}/.env" ]; then @@ -100,7 +93,7 @@ fi # -- Pre-flight checks ---------------------------------------------------------- header "Pre-flight Checks" -for cmd in kubectl helm boundary curl python3 docker; do +for cmd in kubectl helm boundary curl docker kind python3; do command -v "${cmd}" >/dev/null 2>&1 \ || fail "'${cmd}' is required but not installed. Run: make acceptance-setup" pass "${cmd} found" @@ -111,7 +104,7 @@ docker info >/dev/null 2>&1 \ || fail "Docker daemon is not running. Start Docker Desktop and retry." pass "Docker daemon is running" -[ -f "${KIND_CONFIG}" ] || fail "Kind config not found: ${KIND_CONFIG}" +[ -f "${KIND_MATRIX_TEMPLATE}" ] || fail "Kind matrix template not found: ${KIND_MATRIX_TEMPLATE}" [ -f "${TCP_TEST}" ] || fail "TCP test not found: ${TCP_TEST}" pass "Test scripts present" @@ -127,30 +120,25 @@ echo "" declare -A RESULTS declare -A RESULT_NOTES -# -- download_kind: fetch a pinned KIND binary, cache it in /tmp ---------------- -download_kind() { - local version="$1" - local bin_path="${KIND_CACHE_DIR}/kind-${version}" - - if [ -x "${bin_path}" ]; then - info "Using cached KIND ${version} at ${bin_path}" - else - info "Downloading KIND ${version} for ${KIND_PLATFORM}..." - curl -fsSL \ - "https://kind.sigs.k8s.io/dl/${version}/kind-${KIND_PLATFORM}" \ - -o "${bin_path}" - chmod +x "${bin_path}" - pass "Downloaded KIND ${version}" +# -- Crash-safe cleanup -------------------------------------------------------- +# CURRENT_VERSION is set just before a cluster is created and cleared once that +# version has been fully torn down, so the EXIT trap only acts when the run is +# interrupted mid-version (Ctrl-C, error, or CI cancellation). +CURRENT_VERSION="" +_matrix_cleanup() { + if [ -n "${CURRENT_VERSION:-}" ]; then + echo "" >&2 + warn "Interrupted — tearing down KIND cluster '${KIND_CLUSTER_NAME}'..." + kind delete cluster --name "${KIND_CLUSTER_NAME}" >/dev/null 2>&1 || true fi - - echo "${bin_path}" + [ -n "${CHART_DIR:-}" ] && rm -f "${CHART_DIR}/worker.hcl" 2>/dev/null || true } +trap _matrix_cleanup EXIT # -- preload_worker_image: pull image locally then load into KIND node --------- # This avoids a cold registry pull on a fresh node, which is the main cause # of the deployment-readiness timeout in matrix runs. preload_worker_image() { - local kind_bin="$1" # Use BOUNDARY_BYOW_IMAGE if set (enterprise/custom image), else chart default local image="${BOUNDARY_BYOW_IMAGE:-hashicorp/boundary-enterprise:0.21-ent}" info "Pre-loading worker image into KIND cluster: ${image}" @@ -189,7 +177,7 @@ EOF fi # Load from local daemon into the KIND node - if ! "${kind_bin}" load docker-image "${image}" \ + if ! kind load docker-image "${image}" \ --name "${KIND_CLUSTER_NAME}" >/dev/null 2>&1; then warn "kind load docker-image failed — pod will pull from registry (may be slow)" return 0 @@ -199,14 +187,27 @@ EOF # -- cleanup_cluster: delete the acceptance cluster if it exists --------------- cleanup_cluster() { - local kind_bin="$1" - if "${kind_bin}" get clusters 2>/dev/null | grep -q "^${KIND_CLUSTER_NAME}$"; then + if kind get clusters 2>/dev/null | grep -q "^${KIND_CLUSTER_NAME}$"; then info "Deleting existing KIND cluster '${KIND_CLUSTER_NAME}'..." - "${kind_bin}" delete cluster --name "${KIND_CLUSTER_NAME}" >/dev/null 2>&1 + kind delete cluster --name "${KIND_CLUSTER_NAME}" >/dev/null 2>&1 pass "Cluster '${KIND_CLUSTER_NAME}' deleted" fi } +# -- create_kind_config_for_k8s: pin every node to kindest/node: ------ +# Renders k8s-matrix-config.yaml.tpl, substituting the __K8S_VERSION__ +# placeholder with the requested tag. The node layout and NodePort mappings live +# in that template file so they can be maintained without editing this script. +create_kind_config_for_k8s() { + local k8s_version="$1" + [ -f "${KIND_MATRIX_TEMPLATE}" ] || fail "Kind matrix template not found: ${KIND_MATRIX_TEMPLATE}" + local cfg + cfg="$(mktemp)" || fail "Failed to create temp kind config" + sed "s|__K8S_VERSION__|${k8s_version}|g" "${KIND_MATRIX_TEMPLATE}" > "${cfg}" \ + || fail "Failed to render kind config for ${k8s_version}" + echo "${cfg}" +} + # -- generate_worker_config: create a fresh worker.hcl activation token -------- generate_worker_config() { info "Generating fresh worker.hcl (new activation token)..." @@ -276,116 +277,133 @@ install_helm_chart() { } # -- Main matrix loop ----------------------------------------------------------- -header "KIND Version Matrix Test — TCP Target Connection" -echo " Platform : ${KIND_PLATFORM}" -echo " Versions : ${KIND_VERSIONS[*]}" -echo " Chart dir : ${CHART_DIR}" - -for VERSION in "${KIND_VERSIONS[@]}"; do +header "Kubernetes Version Matrix Test — TCP Target Connection" +echo " Versions : ${MATRIX_K8S_VERSIONS[*]}" >&2 +echo " Chart dir : ${CHART_DIR}" >&2 - header "Testing with KIND ${VERSION}" - echo " --------------------------------" >&2 +for VERSION in "${MATRIX_K8S_VERSIONS[@]}"; do - # 1. Download pinned KIND binary - KIND_BIN="$(download_kind "${VERSION}")" - - # 2. Confirm binary reports the expected version - DETECTED="$("${KIND_BIN}" version 2>&1)" - info "Binary reports: ${DETECTED}" - echo "" + header "Testing with Kubernetes ${VERSION}" + info "Using node image: kindest/node:${VERSION}" + echo "" >&2 - # 3. Remove any leftover cluster from a previous run - cleanup_cluster "${KIND_BIN}" + # Arm the EXIT trap for this version so an interrupt mid-setup still tears + # the cluster down. + CURRENT_VERSION="${VERSION}" - # 4. Create a fresh cluster with this KIND version - info "Creating KIND cluster '${KIND_CLUSTER_NAME}' using KIND ${VERSION}..." - CREATE_OUT=$(mktemp) - if ! "${KIND_BIN}" create cluster \ - --name "${KIND_CLUSTER_NAME}" \ - --config "${KIND_CONFIG}" >${CREATE_OUT} 2>&1; then + # --- Per-version setup (steps 1-5) ----------------------------------------- + # Run setup in a subshell so a failure (cluster create / helm install) + # records FAIL for this version and continues to the next, instead of + # aborting the whole matrix via fail() -> exit 1. + set +e + ( + set -euo pipefail + + # 1. Remove any leftover cluster from a previous run + cleanup_cluster + + # 2. Create a fresh cluster pinned to this Kubernetes version + local_kind_cfg="$(create_kind_config_for_k8s "${VERSION}")" + info "Creating KIND cluster '${KIND_CLUSTER_NAME}' using kindest/node:${VERSION}..." + CREATE_OUT=$(mktemp) || fail "Failed to create temp file for cluster creation output" + if ! kind create cluster \ + --name "${KIND_CLUSTER_NAME}" \ + --config "${local_kind_cfg}" >"${CREATE_OUT}" 2>&1; then + echo "" >&2 + echo "❌ kind create cluster failed. Output:" >&2 + cat "${CREATE_OUT}" >&2 + rm -f "${local_kind_cfg}" "${CREATE_OUT}" + fail "KIND cluster creation failed for Kubernetes ${VERSION}" + fi + rm -f "${local_kind_cfg}" "${CREATE_OUT}" + pass "Cluster '${KIND_CLUSTER_NAME}' created with Kubernetes ${VERSION}" echo "" >&2 - echo "❌ kind create cluster failed. Output:" >&2 - cat "${CREATE_OUT}" >&2 - rm -f "${CREATE_OUT}" - fail "KIND cluster creation failed for ${VERSION}" - fi - rm -f "${CREATE_OUT}" - pass "Cluster '${KIND_CLUSTER_NAME}' created with KIND ${VERSION}" - echo "" - - # 5. Pre-load the worker image into the KIND node to avoid cold registry pull - preload_worker_image "${KIND_BIN}" - echo "" - # 6. Generate a new worker activation token / worker.hcl - generate_worker_config - echo "" + # 3. Pre-load the worker image into the KIND node to avoid cold registry pull + preload_worker_image + echo "" >&2 - # 7. Install the Helm chart - install_helm_chart - echo "" + # 4. Generate a new worker activation token / worker.hcl + generate_worker_config + echo "" >&2 - # 8. Run the TCP target connection test with an extended timeout. - # TIMEOUT=600 gives 10 min — enough for image load + Boundary registration. - # tcp-target-conn-test.sh honours TIMEOUT env var (falls back to 300). - info "Running tcp-target-conn-test.sh for KIND ${VERSION} (TIMEOUT=600s)..." - echo "" - set +e - TIMEOUT=600 bash "${TCP_TEST}" - TCP_EXIT=$? + # 5. Install the Helm chart + install_helm_chart + echo "" >&2 + ) + SETUP_EXIT=$? set -e - if [ "${TCP_EXIT}" -eq 0 ]; then - RESULTS["${VERSION}"]="PASS" - RESULT_NOTES["${VERSION}"]="" - pass "KIND ${VERSION}: TCP target connection test PASSED" - else + if [ "${SETUP_EXIT}" -ne 0 ]; then RESULTS["${VERSION}"]="FAIL" - RESULT_NOTES["${VERSION}"]="tcp-target-conn-test.sh exited with code ${TCP_EXIT}" - warn "KIND ${VERSION}: TCP target connection test FAILED (exit code ${TCP_EXIT})" + RESULT_NOTES["${VERSION}"]="setup failed (exit code ${SETUP_EXIT})" + warn "Kubernetes ${VERSION}: setup FAILED (exit code ${SETUP_EXIT}) — skipping TCP test" + else + # 6. Run the TCP target connection test with an extended timeout. + # TIMEOUT=600 gives 10 min — enough for image load + Boundary registration. + # tcp-target-conn-test.sh honours TIMEOUT env var (falls back to 300). + info "Running tcp-target-conn-test.sh for Kubernetes ${VERSION} (TIMEOUT=600s)..." + echo "" + set +e + ( cd "${CHART_DIR}" && TIMEOUT=600 bash "${TCP_TEST}" ) + TCP_EXIT=$? + set -e + + if [ "${TCP_EXIT}" -eq 0 ]; then + RESULTS["${VERSION}"]="PASS" + RESULT_NOTES["${VERSION}"]="" + pass "Kubernetes ${VERSION}: TCP target connection test PASSED" + else + RESULTS["${VERSION}"]="FAIL" + RESULT_NOTES["${VERSION}"]="tcp-target-conn-test.sh exited with code ${TCP_EXIT}" + warn "Kubernetes ${VERSION}: TCP target connection test FAILED (exit code ${TCP_EXIT})" + fi fi - # 9. Tear down the cluster + # 7. Tear down the cluster echo "" - info "Tearing down cluster for KIND ${VERSION}..." - cleanup_cluster "${KIND_BIN}" + info "Tearing down cluster for Kubernetes ${VERSION}..." + cleanup_cluster - # 10. Run Boundary-side cleanup to remove worker registration(s) + # 8. Run Boundary-side cleanup to remove worker registration(s) # Run cleanup script from chart dir so it can load .env if [ -x "${SCRIPT_DIR}/cleanup-worker.sh" ]; then info "Running Boundary worker cleanup script..." - (cd "${CHART_DIR}" && bash "${SCRIPT_DIR}/cleanup-worker.sh") || warn "cleanup-worker.sh failed for KIND ${VERSION}" + (cd "${CHART_DIR}" && bash "${SCRIPT_DIR}/cleanup-worker.sh") || warn "cleanup-worker.sh failed for Kubernetes ${VERSION}" else warn "cleanup-worker.sh not found or not executable at ${SCRIPT_DIR}" fi info "Removing worker.hcl..." rm -f "${CHART_DIR}/worker.hcl" - pass "Cleanup complete for KIND ${VERSION}" + + # This version is fully cleaned up — disarm the EXIT trap for it. + CURRENT_VERSION="" + pass "Cleanup complete for Kubernetes ${VERSION}" done # -- Summary -------------------------------------------------------------------- header "Matrix Test Summary" -printf " %-14s %-8s %s\n" "KIND Version" "Result" "Notes" -printf " %-14s %-8s %s\n" "--------------" "--------" "-----" +printf " %-16s %-8s %s\n" "K8s Version" "Result" "Notes" >&2 +printf " %-16s %-8s %s\n" "----------------" "--------" "-----" >&2 OVERALL_PASS=true -for VERSION in "${KIND_VERSIONS[@]}"; do +for VERSION in "${MATRIX_K8S_VERSIONS[@]}"; do RESULT="${RESULTS[${VERSION}]:-SKIP}" NOTE="${RESULT_NOTES[${VERSION}]:-}" if [ "${RESULT}" = "PASS" ]; then - printf " %-14s ✅ %-8s %s\n" "${VERSION}" "PASS" "${NOTE}" + printf " %-16s ✅ %-8s %s\n" "${VERSION}" "PASS" "${NOTE}" >&2 else - printf " %-14s ❌ %-8s %s\n" "${VERSION}" "${RESULT}" "${NOTE}" + printf " %-16s ❌ %-8s %s\n" "${VERSION}" "${RESULT}" "${NOTE}" >&2 OVERALL_PASS=false fi done -echo "" +echo "" >&2 if [ "${OVERALL_PASS}" = "true" ]; then - pass "All KIND versions passed the TCP target connection test!" + pass "All Kubernetes versions passed the TCP target connection test!" exit 0 else - fail "One or more KIND versions failed — see summary above." + fail "One or more Kubernetes versions failed — see summary above." fi diff --git a/tests/acceptance/tcp-target-conn-test.sh b/tests/acceptance/tcp-target-conn-test.sh index 7299617..bbb9934 100755 --- a/tests/acceptance/tcp-target-conn-test.sh +++ b/tests/acceptance/tcp-target-conn-test.sh @@ -43,7 +43,7 @@ trap _cleanup EXIT CONTEXT="kind-acceptance" NAMESPACE="boundary" DEPLOY="boundary-worker-deployment" -# Allow callers (e.g. kind-version-matrix-test.sh) to override the timeout. +# Allow callers (e.g. k8s-version-matrix-test.sh) to override the timeout. # Default is 300s for standalone runs; the matrix test exports TIMEOUT=600. TIMEOUT="${TIMEOUT:-300}" @@ -163,28 +163,56 @@ fi echo "" # ── Boundary API: confirm worker record exists (activation token consumed) ──── +# Prefer the exact worker ID recorded by `make worker-config` (ID-scoped) so +# that concurrent runs sharing one Boundary cluster never pick up each other's +# worker. Fall back to a tag-based lookup when the ID file is absent. +WORKER_ID_FILE="${BOUNDARY_WORKER_ID_FILE:-/tmp/boundary-worker-id.txt}" +EXPECTED_WORKER_ID="" +[ -f "${WORKER_ID_FILE}" ] && EXPECTED_WORKER_ID="$(tr -d '[:space:]' < "${WORKER_ID_FILE}" 2>/dev/null || true)" +[ -n "${EXPECTED_WORKER_ID}" ] && info "Expecting worker ID: ${EXPECTED_WORKER_ID}" + info "Verifying worker record exists in Boundary..." -WORKERS_JSON=$(boundary workers list \ - -scope-id global \ - -addr "${BOUNDARY_ADDR}" \ - -token env://BOUNDARY_TOKEN \ - -format json 2>&1) || fail "Failed to list workers from Boundary:\n${WORKERS_JSON}" +WORKER_ID="" +for ((attempt=1; attempt<=20; attempt++)); do + WORKERS_JSON=$(boundary workers list \ + -scope-id global \ + -addr "${BOUNDARY_ADDR}" \ + -token env://BOUNDARY_TOKEN \ + -format json 2>&1) || fail "Failed to list workers from Boundary:\n${WORKERS_JSON}" -WORKER_ID=$(printf '%s\n' "${WORKERS_JSON}" | python3 -c " -import json, sys + WORKER_ID=$(printf '%s\n' "${WORKERS_JSON}" \ + | EXPECTED_WORKER_ID="${EXPECTED_WORKER_ID}" python3 -c " +import json, os, sys +expected = os.environ.get('EXPECTED_WORKER_ID', '').strip() data = json.load(sys.stdin) -for w in data.get('items', []): +items = data.get('items', []) +# 1. ID-scoped match: the exact worker created for THIS run, once connected. +if expected: + for w in items: + if w.get('id') == expected and w.get('address'): + print(w.get('id', '')); sys.exit(0) + sys.exit(0) +# 2. Fallback: first connected worker carrying the 'worker' tag. +for w in items: tags = w.get('canonical_tags', {}).get('type', []) if 'worker' in tags and w.get('address'): - print(w.get('id', '')) - break + print(w.get('id', '')); break " 2>/dev/null || true) -[ -n "${WORKER_ID}" ] || fail "No worker with 'worker' tag found in Boundary. Activation token may not have been consumed." + [ -n "${WORKER_ID}" ] && break + sleep 3 +done + +if [ -z "${WORKER_ID}" ]; then + if [ -n "${EXPECTED_WORKER_ID}" ]; then + fail "Worker ${EXPECTED_WORKER_ID} did not connect to Boundary in time. Activation token may not have been consumed." + fi + fail "No worker with 'worker' tag found in Boundary. Activation token may not have been consumed." +fi pass "Worker record exists in Boundary: ${WORKER_ID}" -# Save WORKER_ID for cleanup -echo "${WORKER_ID}" > /tmp/boundary-worker-id.txt +# Persist the confirmed worker ID for ID-scoped cleanup. +echo "${WORKER_ID}" > "${WORKER_ID_FILE}" echo "" # ── Log: confirm worker is reaching upstream ──────────────────────────────────