From 0edf27cb3fbcf40719abb7a7fcf27c9fed6b22ba Mon Sep 17 00:00:00 2001
From: Ravi Shankar <ravish@nvidia.com>
Date: Thu, 25 Jun 2026 20:49:24 -0700
Subject: [PATCH] test(e2e): e2e k8s environment tests using chainsaw

Signed-off-by: Ravi Shankar <ravish@nvidia.com>
---
 .claude/CLAUDE.md                             |  16 +
 .github/workflows/e2e.yml                     | 100 ++++++
 AGENTS.md                                     |  16 +
 Makefile                                      |  47 ++-
 docs/engines/k8s.md                           |  46 +++
 tests/chainsaw/README.md                      | 149 +++++++++
 tests/chainsaw/chainsaw-config.yaml           |  14 +
 .../k8s/label-application/chainsaw-test.yaml  | 156 ++++++++++
 .../k8s/label-application/values.yaml         |  29 ++
 .../k8s/label-truncation/chainsaw-test.yaml   | 153 ++++++++++
 .../chainsaw/k8s/label-truncation/values.yaml |  29 ++
 tests/chainsaw/kind-config.yaml               |   8 +
 .../block-complement/chainsaw-test.yaml       | 252 +++++++++++++++
 .../slinky/block-complement/values.yaml       |  41 +++
 .../slinky/dra-provider/chainsaw-test.yaml    | 226 ++++++++++++++
 .../chainsaw/slinky/dra-provider/values.yaml  |  25 ++
 .../slinky/dynamic-nodes/chainsaw-test.yaml   | 288 ++++++++++++++++++
 .../chainsaw/slinky/dynamic-nodes/values.yaml |  43 +++
 .../slinky/tree-topology/chainsaw-test.yaml   | 123 ++++++++
 .../chainsaw/slinky/tree-topology/values.yaml |  37 +++
 20 files changed, 1796 insertions(+), 2 deletions(-)
 create mode 100644 .github/workflows/e2e.yml
 create mode 100644 tests/chainsaw/README.md
 create mode 100644 tests/chainsaw/chainsaw-config.yaml
 create mode 100644 tests/chainsaw/k8s/label-application/chainsaw-test.yaml
 create mode 100644 tests/chainsaw/k8s/label-application/values.yaml
 create mode 100644 tests/chainsaw/k8s/label-truncation/chainsaw-test.yaml
 create mode 100644 tests/chainsaw/k8s/label-truncation/values.yaml
 create mode 100644 tests/chainsaw/kind-config.yaml
 create mode 100644 tests/chainsaw/slinky/block-complement/chainsaw-test.yaml
 create mode 100644 tests/chainsaw/slinky/block-complement/values.yaml
 create mode 100644 tests/chainsaw/slinky/dra-provider/chainsaw-test.yaml
 create mode 100644 tests/chainsaw/slinky/dra-provider/values.yaml
 create mode 100644 tests/chainsaw/slinky/dynamic-nodes/chainsaw-test.yaml
 create mode 100644 tests/chainsaw/slinky/dynamic-nodes/values.yaml
 create mode 100644 tests/chainsaw/slinky/tree-topology/chainsaw-test.yaml
 create mode 100644 tests/chainsaw/slinky/tree-topology/values.yaml
diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md
index 9b39401b..9bc1fb00 100644
--- a/.claude/CLAUDE.md
+++ b/.claude/CLAUDE.md
@@ -42,6 +42,7 @@ internal/             # Shared utilities not part of the public API
 charts/topograph/     # Helm chart (with node-data-broker subchart); tests/ holds the helm-unittest suites + snapshots
 docs/                 # Public-facing docs — overview.md, architecture.md, api.md + providers/, engines/, reference/ subdirectories
 tests/models/         # YAML simulation fixtures
+tests/chainsaw/       # Chainsaw E2E test suites (label-application, label-truncation, node-observer, slinky)
 config/               # Sample topograph-config.yaml
 scripts/              # Build scripts (deb, rpm, SSL, clean)
 localdev/             # Developer-local workspace — not tracked; personal scratch files
@@ -94,6 +95,20 @@ make coverage   # human-readable per-package summary
 
 Run `make qualify` before pushing. The individual targets are available if you want to run a single check during iteration. Run `make chart-test` when you change `charts/topograph/` or its subcharts; CI runs it on every workflow trigger.
 
+### E2E tests (Chainsaw)
+
+Chainsaw conformance tests live in `tests/chainsaw/` and exercise the full Helm deploy → generate → assert cycle against a real cluster.
+
+```bash
+make e2e-local                              # build image, create kind cluster, run all suites, delete cluster
+make kind-load KIND_CLUSTER=<name>          # load image into an existing kind cluster (run before make e2e)
+make e2e                                    # run suites against current KUBECONFIG context
+```
+
+`make e2e` uses `E2E_IMAGE_TAG` (defaults to the short commit SHA) as the image tag. For a local kind cluster, run `make image-build && make kind-load KIND_CLUSTER=<name>` before each `make e2e` — the tag changes with every commit, so both steps are needed after any new commit. Prerequisites: `chainsaw`, `kind`, `helm`, `kubectl`, `docker`. See `tests/chainsaw/README.md` for details.
+
+These tests are triggered manually via `.github/workflows/e2e.yml` (`workflow_dispatch`). Run them before merging changes to the Helm chart, Node Observer, or engine output.
+
 ### Coverage policy
 
 From `codecov.yml`:
@@ -109,6 +124,7 @@ Coverage checks run on pull requests. A drop below target with no matching uplif
 - `.github/workflows/docker.yml` — container image build (manual trigger)
 - `.github/workflows/docker-ib.yml` — InfiniBand-variant container (manual trigger)
 - `.github/workflows/helm-release.yaml` — Helm chart release (manual trigger)
+- `.github/workflows/e2e.yml` — Chainsaw E2E suite against a kind cluster (manual trigger via `workflow_dispatch`)
 
 ### Deployment surfaces
 
diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
new file mode 100644
index 00000000..0eb89641
--- /dev/null
+++ b/.github/workflows/e2e.yml
@@ -0,0 +1,100 @@
+# Copyright 2026 NVIDIA CORPORATION
+# SPDX-License-Identifier: Apache-2.0
+
+name: E2E
+
+on:
+  workflow_dispatch:
+    inputs:
+      chainsaw_version:
+        description: "Chainsaw version to install (e.g. v0.2.12)"
+        required: false
+        default: "latest"
+
+env:
+  KIND_CLUSTER: topograph-e2e
+  IMAGE_REPO: ghcr.io/nvidia/topograph
+  CHAINSAW_VERSION: ${{ github.event.inputs.chainsaw_version || 'latest' }}
+
+jobs:
+  e2e:
+    name: Chainsaw E2E
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+
+    steps:
+    - uses: actions/checkout@v5
+
+    - name: Set up Go
+      uses: actions/setup-go@v6
+      with:
+        go-version: '1.25.9'
+
+    - name: Install kind
+      run: go install sigs.k8s.io/kind@latest
+
+    - name: Install Chainsaw
+      run: |
+        if [ "$CHAINSAW_VERSION" = "latest" ]; then
+          TAG=$(curl -s https://api.github.com/repos/kyverno/chainsaw/releases/latest \
+            | grep '"tag_name"' | cut -d'"' -f4)
+        else
+          TAG="$CHAINSAW_VERSION"
+        fi
+        echo "Installing Chainsaw $TAG"
+        BASE_URL="https://github.com/kyverno/chainsaw/releases/download/${TAG}"
+        curl -fsSL "${BASE_URL}/chainsaw_linux_amd64.tar.gz"      -o chainsaw.tar.gz
+        curl -fsSL "${BASE_URL}/chainsaw_checksums.txt"            -o chainsaw_checksums.txt
+        grep "chainsaw_linux_amd64.tar.gz" chainsaw_checksums.txt | sha256sum -c -
+        tar xz -f chainsaw.tar.gz chainsaw
+        sudo mv chainsaw /usr/local/bin/
+        rm -f chainsaw.tar.gz chainsaw_checksums.txt
+        chainsaw version
+
+    - name: Create kind cluster
+      run: |
+        kind create cluster \
+          --name "$KIND_CLUSTER" \
+          --config tests/chainsaw/kind-config.yaml \
+          --wait 120s
+
+    - name: Build Linux/amd64 image
+      run: make build-linux-amd64
+
+    - name: Build container image
+      env:
+        GOOS: linux
+        GOARCH: amd64
+      run: |
+        # Use the short commit SHA as the image tag: always a valid Docker tag,
+        # works regardless of branch naming conventions.
+        IMAGE_TAG=$(git rev-parse --short HEAD)
+        make image-build IMAGE_TAG="$IMAGE_TAG"
+        echo "IMAGE_TAG=$IMAGE_TAG" >> "$GITHUB_ENV"
+
+    - name: Load image into kind
+      run: |
+        kind load docker-image "${IMAGE_REPO}:${IMAGE_TAG}" \
+          --name "$KIND_CLUSTER"
+
+    - name: Run E2E tests
+      env:
+        TOPOGRAPH_IMAGE_REPO: ${{ env.IMAGE_REPO }}
+        TOPOGRAPH_IMAGE_PULL_POLICY: Never
+      run: |
+        make e2e E2E_IMAGE_TAG="$IMAGE_TAG"
+
+    - name: Collect diagnostic logs on failure
+      if: failure()
+      run: |
+        echo "=== kind nodes ==="
+        kubectl get nodes -o wide
+        echo "=== all pods ==="
+        kubectl get pods -A -o wide
+        echo "=== recent events ==="
+        kubectl get events -A --sort-by='.lastTimestamp' | tail -50
+
+    - name: Delete kind cluster
+      if: always()
+      run: kind delete cluster --name "$KIND_CLUSTER"
diff --git a/AGENTS.md b/AGENTS.md
index 43270e0b..c5cef641 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -42,6 +42,7 @@ internal/             # Shared utilities not part of the public API
 charts/topograph/     # Helm chart (with node-data-broker subchart); tests/ holds the helm-unittest suites + snapshots
 docs/                 # Public-facing docs — overview.md, architecture.md, api.md + providers/, engines/, reference/ subdirectories
 tests/models/         # YAML simulation fixtures
+tests/chainsaw/       # Chainsaw E2E test suites (label-application, label-truncation, node-observer, slinky)
 config/               # Sample topograph-config.yaml
 scripts/              # Build scripts (deb, rpm, SSL, clean)
 localdev/             # Developer-local workspace — not tracked; personal scratch files
@@ -94,6 +95,20 @@ make coverage   # human-readable per-package summary
 
 Run `make qualify` before pushing. The individual targets are available if you want to run a single check during iteration. Run `make chart-test` when you change `charts/topograph/` or its subcharts; CI runs it on every workflow trigger.
 
+### E2E tests (Chainsaw)
+
+Chainsaw conformance tests live in `tests/chainsaw/` and exercise the full Helm deploy → generate → assert cycle against a real cluster.
+
+```bash
+make e2e-local                              # build image, create kind cluster, run all suites, delete cluster
+make kind-load KIND_CLUSTER=<name>          # load image into an existing kind cluster (run before make e2e)
+make e2e                                    # run suites against current KUBECONFIG context
+```
+
+`make e2e` uses `E2E_IMAGE_TAG` (defaults to the short commit SHA) as the image tag. For a local kind cluster, run `make image-build && make kind-load KIND_CLUSTER=<name>` before each `make e2e` — the tag changes with every commit, so both steps are needed after any new commit. Prerequisites: `chainsaw`, `kind`, `helm`, `kubectl`, `docker`. See `tests/chainsaw/README.md` for details.
+
+These tests are triggered manually via `.github/workflows/e2e.yml` (`workflow_dispatch`). Run them before merging changes to the Helm chart, Node Observer, or engine output.
+
 ### Coverage policy
 
 From `codecov.yml`:
@@ -109,6 +124,7 @@ Coverage checks run on pull requests. A drop below target with no matching uplif
 - `.github/workflows/docker.yml` — container image build (manual trigger)
 - `.github/workflows/docker-ib.yml` — InfiniBand-variant container (manual trigger)
 - `.github/workflows/helm-release.yaml` — Helm chart release (manual trigger)
+- `.github/workflows/e2e.yml` — Chainsaw E2E suite against a kind cluster (manual trigger via `workflow_dispatch`)
 
 ### Deployment surfaces
 
diff --git a/Makefile b/Makefile
index a0cf09c1..0196ff1e 100644
--- a/Makefile
+++ b/Makefile
@@ -23,7 +23,7 @@ OUTPUT_DIR := ./bin
 
 IMAGE_REPO ?=ghcr.io/nvidia/topograph
 GIT_REF ?=$(shell git rev-parse --abbrev-ref HEAD)
-IMAGE_TAG ?=$(GIT_REF)
+IMAGE_TAG ?=$(shell git rev-parse --short HEAD)
 
 .PHONY: build
 build:
@@ -102,7 +102,7 @@ coverage: test
 
 .PHONY: image-build
 image-build:
-	$(DOCKER_BIN) build --build-arg TARGETOS=$(GOOS) --build-arg TARGETARCH=$(GOARCH) -t $(IMAGE_REPO):$(IMAGE_TAG) -f ./Dockerfile .
+	$(DOCKER_BIN) build --build-arg TARGETOS=linux --build-arg TARGETARCH=$(GOARCH) -t $(IMAGE_REPO):$(IMAGE_TAG) -f ./Dockerfile .
 
 .PHONY: image-push
 image-push: image-build
@@ -115,6 +115,49 @@ docker-buildx:
 	$(DOCKER_BIN) buildx build --platform $(PLATFORMS) -t $(IMAGE_REPO):$(IMAGE_TAG) -f ./Dockerfile --push .
 	- $(DOCKER_BIN) buildx rm topograph-builder
 
+CHAINSAW_BIN ?= chainsaw
+KIND_CLUSTER  ?= topograph-e2e
+E2E_IMAGE_TAG ?= $(IMAGE_TAG)
+
+# Check that chainsaw is installed; print install hint if not.
+.PHONY: chainsaw-install
+chainsaw-install:
+	@which $(CHAINSAW_BIN) >/dev/null 2>&1 || \
+	  (echo "chainsaw not found — install from https://kyverno.github.io/chainsaw/latest/quick-start/install/"; exit 1)
+
+# Load the locally-built image into an existing kind cluster with the correct
+# E2E_IMAGE_TAG.  Use this before running make e2e against a local kind cluster:
+#   make kind-load KIND_CLUSTER=topograph-test && make e2e
+.PHONY: kind-load
+kind-load:
+	kind load docker-image $(IMAGE_REPO):$(E2E_IMAGE_TAG) --name $(KIND_CLUSTER)
+
+# Run all Chainsaw E2E suites against the current KUBECONFIG context.
+# For a pre-pushed registry image: set TOPOGRAPH_IMAGE_REPO and TOPOGRAPH_IMAGE_TAG.
+# For a local kind cluster: run "make kind-load KIND_CLUSTER=<cluster>" first.
+.PHONY: e2e
+e2e: chainsaw-install
+	TOPOGRAPH_IMAGE_REPO=$(IMAGE_REPO) \
+	TOPOGRAPH_IMAGE_TAG=$(E2E_IMAGE_TAG) \
+	$(CHAINSAW_BIN) test --test-dir tests/chainsaw
+
+# Build the image, create a 4-worker kind cluster, load the image, run all
+# Chainsaw suites, and destroy the cluster.  Requires kind and chainsaw.
+.PHONY: e2e-local
+e2e-local: chainsaw-install image-build
+	kind create cluster --name $(KIND_CLUSTER) \
+	  --config tests/chainsaw/kind-config.yaml --wait 120s \
+	  || kind get clusters | grep -q "^$(KIND_CLUSTER)$$"
+	kind load docker-image $(IMAGE_REPO):$(E2E_IMAGE_TAG) --name $(KIND_CLUSTER)
+	KUBECONFIG="$$(kind get kubeconfig --name $(KIND_CLUSTER))" \
+	TOPOGRAPH_IMAGE_REPO=$(IMAGE_REPO) \
+	TOPOGRAPH_IMAGE_TAG=$(E2E_IMAGE_TAG) \
+	TOPOGRAPH_IMAGE_PULL_POLICY=Never \
+	$(CHAINSAW_BIN) test --test-dir tests/chainsaw; \
+	E2E_STATUS=$$?; \
+	kind delete cluster --name $(KIND_CLUSTER); \
+	exit $$E2E_STATUS
+
 .PHONY: ssl
 ssl:
 	SSL_DIR=ssl ./scripts/configure-ssl.sh
diff --git a/docs/engines/k8s.md b/docs/engines/k8s.md
index dc8a1915..e509d8c5 100644
--- a/docs/engines/k8s.md
+++ b/docs/engines/k8s.md
@@ -344,6 +344,52 @@ tests:
   enabled: false
 ```
 
+### Conformance testing with Chainsaw
+
+`helm test` verifies that a deployed instance is healthy. To verify that the engine actually **applies correct topology labels** to nodes, use the Chainsaw E2E suite in `tests/chainsaw/`.
+
+[Chainsaw](https://kyverno.github.io/chainsaw/) is Kyverno's declarative E2E framework. Each suite drives `apply → wait → assert → cleanup` against a real cluster using the built-in **test provider** — no cloud credentials required.
+
+#### Test suites
+
+| Suite | What it checks |
+|---|---|
+| `k8s/label-application` | `leaf`, `spine`, and `accelerator` labels applied to nodes after generation |
+| `k8s/label-truncation` | Switch names >63 chars replaced with an FNV64a hash (valid label value) |
+| `slinky/tree-topology` | Slinky engine writes correct `topology.conf` (tree topology) into a ConfigMap |
+| `slinky/dra-provider` | DRA provider discovers NVLink clique topology; Slinky engine writes correct `topology.conf` (block topology) into a ConfigMap |
+
+#### How suites map topology to nodes
+
+Each suite ships a `topology-model.yaml` in its directory. The suite creates
+fake K8s Node objects whose names match the node IDs in that file, loads the
+file into a ConfigMap, and mounts it at `/etc/topograph/models/` inside the
+pod. The `/v1/generate` request passes `modelFileName` pointing at the mounted
+file. No node annotations are required.
+
+#### Running locally
+
+```bash
+# Prerequisites: chainsaw, kind, helm, kubectl, docker
+
+# Full lifecycle — build, create cluster, run all suites, delete cluster:
+make e2e-local
+
+# Against an existing local kind cluster (repeat after each commit):
+make image-build                             # rebuild with the current commit SHA tag
+make kind-load KIND_CLUSTER=<cluster-name>   # load into the cluster
+make e2e
+
+# Single suite only:
+chainsaw test --test-dir tests/chainsaw/k8s/label-application
+```
+
+See `tests/chainsaw/README.md` for full prerequisites and environment variable reference.
+
+#### Running in CI
+
+The `.github/workflows/e2e.yml` workflow runs on `workflow_dispatch`. Trigger it manually from the GitHub UI before merging changes to the Helm chart, Node Observer, or engine output code paths.
+
 ### Chart README
 
 For installation, prerequisites, values reference, and configuration examples, see [`charts/topograph/README.md`](../../charts/topograph/README.md) — also surfaced via `helm show readme topograph/topograph`.
diff --git a/tests/chainsaw/README.md b/tests/chainsaw/README.md
new file mode 100644
index 00000000..bb3701bc
--- /dev/null
+++ b/tests/chainsaw/README.md
@@ -0,0 +1,149 @@
+# Topograph Chainsaw E2E Tests
+
+End-to-end conformance tests for the Kubernetes and Slinky engines using
+[Chainsaw](https://kyverno.github.io/chainsaw/) — Kyverno's declarative
+`apply → wait → assert → cleanup` framework.
+
+## How the tests work
+
+All suites follow the same high-level cycle: prepare cluster state → install
+Topograph → assert outputs → clean up. There are three preparation patterns.
+
+### Pattern A — Test provider with fake nodes
+
+Used by: `k8s/label-application`, `k8s/label-truncation`, `slinky/block-complement`,
+`slinky/dynamic-nodes`
+
+1. Creates a `topology-test-model` ConfigMap with the topology model embedded inline,
+   mounted at `/etc/topograph/models/` in the Topograph pod.
+2. Creates fake K8s Node objects carrying `kubernetes.io/os=linux` so the Node Observer
+   fires on them. k8s-engine suites also carry `topograph.nvidia.com/instance` and
+   `topograph.nvidia.com/region` annotations so the engine can map instance IDs to nodes.
+3. Slinky block-topology suites additionally create one fake slurmd pod per fake node
+   (status-patched to `Ready`) so the Slinky engine can build its
+   k8s-node→SLURM-hostname map for writing the ConfigMap and annotating nodes.
+4. Installs the Topograph Helm chart with the Node Observer enabled. The observer
+   fires on the fake nodes on startup, auto-triggering `/v1/generate` — no manual
+   HTTP POST needed.
+5. Asserts that the expected node labels (k8s engine) or ConfigMap content (Slinky
+   engine) appear.
+6. Cleans up (uninstalls the chart, deletes the fake nodes and namespace).
+
+### Pattern B — DRA provider with fake nodes
+
+Used by: `slinky/dra-provider`
+
+1. Creates fake K8s nodes carrying `nvidia.com/gpu.clique`,
+   `topograph.nvidia.com/instance`, and `topograph.nvidia.com/region`
+   labels/annotations. The DRA provider reads NVLink clique topology directly from
+   the K8s API — no model ConfigMap is needed.
+2. Creates one fake slurmd pod per fake node (status-patched to `Ready`) so the
+   Slinky engine can build its k8s-node→SLURM-hostname map.
+3. Installs the Helm chart with the Node Observer enabled; the observer fires on the
+   fake nodes and auto-triggers generation.
+4. Asserts the `slurm-topology` ConfigMap contains the correct block topology entries.
+5. Cleans up.
+
+### Pattern C — Test provider with real cluster nodes
+
+Used by: `slinky/tree-topology`
+
+1. Creates a `topology-test-model` ConfigMap with the model embedded inline.
+2. Installs the Helm chart with the Node Observer watching all `kubernetes.io/os=linux`
+   nodes. Kind worker nodes already carry this label, so generation is triggered
+   immediately on startup — no fake nodes are created.
+3. Topology entries are derived entirely from the model's switch structure; no slurmd
+   pods are needed.
+4. Asserts the `slurm-topology` ConfigMap contains the correct tree-format entries.
+5. Cleans up.
+
+## Test suites
+
+| Suite | Topology source | What it checks |
+|---|---|---|
+| `k8s/label-application` | Test provider — inline model `s1→{s2,s3}`, nodes `node-01` (under s2) and `node-02` (under s3); two fake K8s nodes | `leaf`, `spine` labels applied correctly to fake nodes |
+| `k8s/label-truncation` | Test provider — inline model `s1→AVERYLONGSWITCHNAMETHATEXCEEDSSIXTYCHARACTERSFORTESTINGPURPOSES01→node-01`; one fake K8s node | Switch names >63 chars are replaced with an FNV64a hash prefixed with `x` |
+| `slinky/tree-topology` | Test provider — inline model `S1→{S2,S3}`, nodes `node-01` and `node-02`; fires on real kind worker nodes (no fake nodes) | Slinky engine writes correct `topology.conf` (tree format) into a ConfigMap |
+| `slinky/dra-provider` | DRA provider — `nvidia.com/gpu.clique` labels on four fake nodes (clique-1: node-01/node-02, clique-2: node-03/node-04); fake slurmd pods | DRA provider discovers NVLink clique topology from node labels; Slinky engine writes correct `topology.conf` (block topology) into a ConfigMap |
+| `slinky/block-complement` | Test provider — inline model: spine→{leaf-1,leaf-2,leaf-3}, three NVLink cliques with node-02 (clique-1) and node-05 (clique-3) absent; four fake K8s nodes and fake slurmd pods | Slinky engine pads the block tree with an empty `BlockName=block004` placeholder when BlockSizes=2,4,8 and only 3 of 4 base-block slots are filled; absent nodes are not emitted in their BlockName line |
+| `slinky/dynamic-nodes` | Test provider — same three-clique model as `block-complement` (node-02/05 absent); four fake K8s nodes and fake slurmd pods; `useDynamicNodes: true`, `configUpdateMode: skeleton-only` | Slinky engine writes all `BlockName` lines without `Nodes=` (skeleton format) and `performReconciliation` annotates each K8s node with `topology.slinky.slurm.net/spec` pointing to its assigned block |
+
+## Prerequisites
+
+| Tool | Install |
+|---|---|
+| `chainsaw` | `brew install kyverno/tap/chainsaw` or see [docs](https://kyverno.github.io/chainsaw/latest/quick-start/install/) |
+| `kind` | `brew install kind` |
+| `helm` | `brew install helm` |
+| `kubectl` | `brew install kubectl` |
+| `docker` | [Docker Desktop](https://www.docker.com/products/docker-desktop/) |
+
+## Quick start — local kind cluster
+
+```bash
+# Build image, create cluster, run all suites, delete cluster
+make e2e-local
+```
+
+`make e2e-local` runs in sequence:
+1. `make image-build` — builds the container image for `linux/<host-arch>`
+2. `kind create cluster` — spins up a 4-worker kind cluster (`tests/chainsaw/kind-config.yaml`)
+3. `kind load docker-image` — loads the local image into the cluster with `imagePullPolicy: Never`
+4. `chainsaw test` — runs all suites
+5. `kind delete cluster` — tears down the cluster
+
+## Running against an existing kind cluster
+
+If you already have a kind cluster and want to run the tests without tearing it
+down, the three-step sequence is:
+
+```bash
+make image-build                             # 1. build the image (tagged with the current commit SHA)
+make kind-load KIND_CLUSTER=<cluster-name>   # 2. load that image into the cluster
+make e2e                                     # 3. run all suites
+```
+
+`IMAGE_TAG` defaults to `$(git rev-parse --short HEAD)`. Because it is tied to
+the commit SHA, you must rebuild and reload whenever you commit new changes —
+otherwise the cluster has a stale image or the tag does not exist at all.
+
+To use a fixed tag instead of the SHA:
+
+```bash
+make image-build E2E_IMAGE_TAG=my-tag
+make kind-load KIND_CLUSTER=<cluster-name> E2E_IMAGE_TAG=my-tag
+make e2e E2E_IMAGE_TAG=my-tag
+```
+
+## Running against a non-kind cluster
+
+For a cluster where the image is already in a reachable registry, pass the
+repo and tag as Make variable overrides (not shell env vars — the Makefile
+uses `IMAGE_REPO` and `E2E_IMAGE_TAG`, not `TOPOGRAPH_IMAGE_REPO`/`TOPOGRAPH_IMAGE_TAG`):
+
+```bash
+make e2e IMAGE_REPO=my-registry/topograph E2E_IMAGE_TAG=my-tag
+```
+
+## Running a single suite
+
+```bash
+chainsaw test --test-dir tests/chainsaw/k8s/label-application
+```
+
+To pass a specific image:
+
+```bash
+TOPOGRAPH_IMAGE_REPO=ghcr.io/nvidia/topograph \
+TOPOGRAPH_IMAGE_TAG=my-tag \
+chainsaw test --test-dir tests/chainsaw/k8s/label-application
+```
+
+## Environment variables
+
+| Variable | Default | Purpose |
+|---|---|---|
+| `TOPOGRAPH_IMAGE_REPO` | `ghcr.io/nvidia/topograph` | Image repository |
+| `TOPOGRAPH_IMAGE_TAG` | `` (chart `appVersion`) | Image tag passed directly to test scripts |
+| `E2E_IMAGE_TAG` | short commit SHA (`git rev-parse --short HEAD`) | Tag used by `make e2e` / `make e2e-local` / `make kind-load` |
+| `TOPOGRAPH_IMAGE_PULL_POLICY` | `IfNotPresent` | Set to `Never` for kind (done automatically by `make e2e-local`) |
diff --git a/tests/chainsaw/chainsaw-config.yaml b/tests/chainsaw/chainsaw-config.yaml
new file mode 100644
index 00000000..2a98e46a
--- /dev/null
+++ b/tests/chainsaw/chainsaw-config.yaml
@@ -0,0 +1,14 @@
+apiVersion: chainsaw.kyverno.io/v1alpha1
+kind: Configuration
+metadata:
+  name: chainsaw
+spec:
+  timeouts:
+    apply: 120s
+    assert: 90s
+    cleanup: 60s
+    delete: 60s
+    error: 30s
+    exec: 120s
+  fullName: true
+  parallel: 1
diff --git a/tests/chainsaw/k8s/label-application/chainsaw-test.yaml b/tests/chainsaw/k8s/label-application/chainsaw-test.yaml
new file mode 100644
index 00000000..13ecb56e
--- /dev/null
+++ b/tests/chainsaw/k8s/label-application/chainsaw-test.yaml
@@ -0,0 +1,156 @@
+# Copyright 2026 NVIDIA CORPORATION
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: chainsaw.kyverno.io/v1alpha1
+kind: Test
+metadata:
+  name: k8s-label-application
+spec:
+  description: >
+    Verify that the Kubernetes engine applies the correct topology labels to
+    cluster nodes. The test uses a static topology model (s1→s2/s3, node-01
+    under s2 and node-02 under s3) embedded inline in a ConfigMap and mounted
+    into the pod. Fake Node objects matching those instance IDs are created so
+    the engine can label them. After triggering generation the test asserts
+    leaf/spine labels on node-01 (s2) and node-02 (s3).
+  concurrent: false
+  steps:
+
+  - name: prepare
+    description: >
+      Create fake K8s Node objects (node-01, node-02) matching the model
+      instance IDs, then create the topology-model ConfigMap with inline content.
+    try:
+    - apply:
+        resource:
+          apiVersion: v1
+          kind: Node
+          metadata:
+            name: node-01
+            labels:
+              kubernetes.io/os: linux
+    - apply:
+        resource:
+          apiVersion: v1
+          kind: Node
+          metadata:
+            name: node-02
+            labels:
+              kubernetes.io/os: linux
+    - apply:
+        resource:
+          apiVersion: v1
+          kind: ConfigMap
+          metadata:
+            name: topology-test-model
+          data:
+            topology-model.yaml: |
+              switches:
+                s1:
+                  switches:
+                  - s2
+                  - s3
+                s2:
+                  nodes:
+                  - node-01
+                s3:
+                  nodes:
+                  - node-02
+              nodes:
+                node-01: {}
+                node-02: {}
+
+  - name: install-topograph
+    description: >
+      Install Topograph together with the Node Observer. The observer fires
+      "Added" events for the fake nodes on startup, automatically triggering
+      topology generation via the modelFileName configured in values.yaml.
+    try:
+    - script:
+        timeout: 120s
+        content: |
+          set -euo pipefail
+          REPO_ROOT=$(git rev-parse --show-toplevel)
+          IMAGE_REPO="${TOPOGRAPH_IMAGE_REPO:-ghcr.io/nvidia/topograph}"
+          IMAGE_TAG="${TOPOGRAPH_IMAGE_TAG:-}"
+          PULL_POLICY="${TOPOGRAPH_IMAGE_PULL_POLICY:-IfNotPresent}"
+
+          kubectl delete clusterrole topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+          kubectl delete clusterrolebinding topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+
+          helm upgrade --install topograph "${REPO_ROOT}/charts/topograph" \
+            --namespace "$NAMESPACE" --create-namespace \
+            --values "$(pwd)/values.yaml" \
+            --set "image.repository=${IMAGE_REPO}" \
+            ${IMAGE_TAG:+--set "image.tag=${IMAGE_TAG}"} \
+            --set "image.pullPolicy=${PULL_POLICY}" \
+            --set "node-observer.image.repository=${IMAGE_REPO}" \
+            ${IMAGE_TAG:+--set "node-observer.image.tag=${IMAGE_TAG}"} \
+            --set "node-observer.image.pullPolicy=${PULL_POLICY}" \
+            --wait --timeout 90s
+    catch:
+    - description: Pod status on install failure
+      script:
+        content: |
+          kubectl get pods -n "$NAMESPACE" -o wide
+          kubectl describe pods -n "$NAMESPACE"
+
+  - name: assert
+    description: >
+      The Node Observer fires "Added" events for the fake nodes on startup,
+      triggering generation automatically. Wait for topology labels to appear:
+      node-01 → leaf=s2 spine=s1, node-02 → leaf=s3 spine=s1.
+    try:
+    - assert:
+        timeout: 60s
+        resource:
+          apiVersion: v1
+          kind: Node
+          metadata:
+            name: node-01
+            labels:
+              network.topology.nvidia.com/leaf: s2
+              network.topology.nvidia.com/spine: s1
+    - assert:
+        timeout: 60s
+        resource:
+          apiVersion: v1
+          kind: Node
+          metadata:
+            name: node-02
+            labels:
+              network.topology.nvidia.com/leaf: s3
+              network.topology.nvidia.com/spine: s1
+    catch:
+    - description: Topograph and Node Observer logs on assertion failure
+      script:
+        content: |
+          echo "=== Topograph logs ==="
+          kubectl logs -n "$NAMESPACE" deploy/topograph --tail=50 || true
+          echo "=== Node Observer logs ==="
+          kubectl logs -n "$NAMESPACE" deploy/topograph-node-observer --tail=50 || true
+          echo "=== Node labels ==="
+          kubectl get nodes node-01 node-02 \
+            -o custom-columns='NAME:.metadata.name,LEAF:.metadata.labels.network\.topology\.nvidia\.com/leaf,SPINE:.metadata.labels.network\.topology\.nvidia\.com/spine' || true
+    finally:
+    - script:
+        timeout: 120s
+        content: |
+          helm uninstall topograph -n "$NAMESPACE" --wait --timeout 60s || true
+          kubectl delete clusterrole topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+          kubectl delete clusterrolebinding topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+          kubectl delete namespace "$NAMESPACE" --timeout=60s || true
+    - delete:
+        ref:
+          apiVersion: v1
+          kind: Node
+          name: node-01
+    - delete:
+        ref:
+          apiVersion: v1
+          kind: Node
+          name: node-02
diff --git a/tests/chainsaw/k8s/label-application/values.yaml b/tests/chainsaw/k8s/label-application/values.yaml
new file mode 100644
index 00000000..eea13501
--- /dev/null
+++ b/tests/chainsaw/k8s/label-application/values.yaml
@@ -0,0 +1,29 @@
+global:
+  provider:
+    name: test
+    params:
+      modelFileName: /etc/topograph/models/topology-model.yaml
+  engine:
+    name: k8s
+
+config:
+  requestAggregationDelay: 1s
+
+node-observer:
+  topograph:
+    trigger:
+      nodeSelector:
+        kubernetes.io/os: linux
+
+node-data-broker:
+  enabled: false
+
+# test specific values to mount the topology-model.yaml ConfigMap into the Topograph pod
+volumes:
+- name: topology-test-model
+  configMap:
+    name: topology-test-model
+
+volumeMounts:
+- name: topology-test-model
+  mountPath: /etc/topograph/models
diff --git a/tests/chainsaw/k8s/label-truncation/chainsaw-test.yaml b/tests/chainsaw/k8s/label-truncation/chainsaw-test.yaml
new file mode 100644
index 00000000..acdaf061
--- /dev/null
+++ b/tests/chainsaw/k8s/label-truncation/chainsaw-test.yaml
@@ -0,0 +1,153 @@
+# Copyright 2026 NVIDIA CORPORATION
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: chainsaw.kyverno.io/v1alpha1
+kind: Test
+metadata:
+  name: k8s-label-truncation
+spec:
+  description: >
+    Verify that switch names exceeding 63 characters (the Kubernetes label-value
+    limit) are replaced with an FNV64a hash prefixed with "x". The test uses
+    a static topology model with a 65-char leaf switch name, embedded inline in
+    a ConfigMap and mounted into the pod. A fake Node object (node-01) is
+    created to receive the label. After generation the leaf label must be ≤63
+    chars and start with "x".
+  concurrent: false
+  steps:
+
+  - name: prepare
+    description: >
+      Create a fake K8s Node object (node-01) matching the model instance ID,
+      then create the topology-model ConfigMap with inline content.
+    try:
+    - apply:
+        resource:
+          apiVersion: v1
+          kind: Node
+          metadata:
+            name: node-01
+            labels:
+              kubernetes.io/os: linux
+    - apply:
+        resource:
+          apiVersion: v1
+          kind: ConfigMap
+          metadata:
+            name: topology-test-model
+          data:
+            topology-model.yaml: |
+              switches:
+                s1:
+                  switches:
+                  - AVERYLONGSWITCHNAMETHATEXCEEDSSIXTYCHARACTERSFORTESTINGPURPOSES01
+                AVERYLONGSWITCHNAMETHATEXCEEDSSIXTYCHARACTERSFORTESTINGPURPOSES01:
+                  nodes:
+                  - node-01
+              nodes:
+                node-01: {}
+
+  - name: install-topograph
+    description: >
+      Install Topograph together with the Node Observer. The observer fires
+      "Added" events for the fake node on startup, automatically triggering
+      topology generation via the modelFileName configured in values.yaml.
+    try:
+    - script:
+        timeout: 120s
+        content: |
+          set -euo pipefail
+          REPO_ROOT=$(git rev-parse --show-toplevel)
+          IMAGE_REPO="${TOPOGRAPH_IMAGE_REPO:-ghcr.io/nvidia/topograph}"
+          IMAGE_TAG="${TOPOGRAPH_IMAGE_TAG:-}"
+          PULL_POLICY="${TOPOGRAPH_IMAGE_PULL_POLICY:-IfNotPresent}"
+
+          kubectl delete clusterrole topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+          kubectl delete clusterrolebinding topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+
+          helm upgrade --install topograph "${REPO_ROOT}/charts/topograph" \
+            --namespace "$NAMESPACE" --create-namespace \
+            --values "$(pwd)/values.yaml" \
+            --set "image.repository=${IMAGE_REPO}" \
+            ${IMAGE_TAG:+--set "image.tag=${IMAGE_TAG}"} \
+            --set "image.pullPolicy=${PULL_POLICY}" \
+            --set "node-observer.image.repository=${IMAGE_REPO}" \
+            ${IMAGE_TAG:+--set "node-observer.image.tag=${IMAGE_TAG}"} \
+            --set "node-observer.image.pullPolicy=${PULL_POLICY}" \
+            --wait --timeout 90s
+    catch:
+    - description: Pod status on install failure
+      script:
+        content: |
+          kubectl get pods -n "$NAMESPACE" -o wide
+          kubectl describe pods -n "$NAMESPACE"
+
+  - name: assert
+    description: >
+      The Node Observer fires "Added" events for the fake node on startup,
+      triggering generation automatically. Assert the leaf label on node-01
+      is ≤63 chars and starts with "x" (FNV64a hash of the 65-char switch name).
+    try:
+    - script:
+        timeout: 90s
+        content: |
+          set -euo pipefail
+
+          echo "Polling for leaf label on node-01..."
+          LEAF=""
+          for i in $(seq 1 30); do
+            LEAF=$(kubectl get node node-01 \
+              -o jsonpath='{.metadata.labels.network\.topology\.nvidia\.com/leaf}' 2>/dev/null || true)
+            [ -n "$LEAF" ] && break
+            sleep 2
+          done
+
+          if [ -z "$LEAF" ]; then
+            echo "FAIL: leaf label not set on node-01"
+            exit 1
+          fi
+
+          echo "leaf label value: '$LEAF' (length=${#LEAF})"
+
+          if [ "${#LEAF}" -gt 63 ]; then
+            echo "FAIL: label length ${#LEAF} exceeds 63"
+            exit 1
+          fi
+
+          ORIGINAL="AVERYLONGSWITCHNAMETHATEXCEEDSSIXTYCHARACTERSFORTESTINGPURPOSES01"
+          if [ "$LEAF" = "$ORIGINAL" ]; then
+            echo "FAIL: label was not hashed (still equals original name)"
+            exit 1
+          fi
+
+          if [ "${LEAF:0:1}" != "x" ]; then
+            echo "FAIL: expected hashed label to start with 'x', got: $LEAF"
+            exit 1
+          fi
+
+          echo "OK: label correctly hashed to '$LEAF'"
+    catch:
+    - description: Topograph and Node Observer logs on assertion failure
+      script:
+        content: |
+          echo "=== Topograph logs ==="
+          kubectl logs -n "$NAMESPACE" deploy/topograph --tail=50 || true
+          echo "=== Node Observer logs ==="
+          kubectl logs -n "$NAMESPACE" deploy/topograph-node-observer --tail=50 || true
+    finally:
+    - script:
+        timeout: 120s
+        content: |
+          helm uninstall topograph -n "$NAMESPACE" --wait --timeout 60s || true
+          kubectl delete clusterrole topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+          kubectl delete clusterrolebinding topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+          kubectl delete namespace "$NAMESPACE" --timeout=60s || true
+    - delete:
+        ref:
+          apiVersion: v1
+          kind: Node
+          name: node-01
diff --git a/tests/chainsaw/k8s/label-truncation/values.yaml b/tests/chainsaw/k8s/label-truncation/values.yaml
new file mode 100644
index 00000000..eea13501
--- /dev/null
+++ b/tests/chainsaw/k8s/label-truncation/values.yaml
@@ -0,0 +1,29 @@
+global:
+  provider:
+    name: test
+    params:
+      modelFileName: /etc/topograph/models/topology-model.yaml
+  engine:
+    name: k8s
+
+config:
+  requestAggregationDelay: 1s
+
+node-observer:
+  topograph:
+    trigger:
+      nodeSelector:
+        kubernetes.io/os: linux
+
+node-data-broker:
+  enabled: false
+
+# test specific values to mount the topology-model.yaml ConfigMap into the Topograph pod
+volumes:
+- name: topology-test-model
+  configMap:
+    name: topology-test-model
+
+volumeMounts:
+- name: topology-test-model
+  mountPath: /etc/topograph/models
diff --git a/tests/chainsaw/kind-config.yaml b/tests/chainsaw/kind-config.yaml
new file mode 100644
index 00000000..6f0e32df
--- /dev/null
+++ b/tests/chainsaw/kind-config.yaml
@@ -0,0 +1,8 @@
+kind: Cluster
+apiVersion: kind.x-k8s.io/v1alpha4
+nodes:
+- role: control-plane
+- role: worker
+- role: worker
+- role: worker
+- role: worker
diff --git a/tests/chainsaw/slinky/block-complement/chainsaw-test.yaml b/tests/chainsaw/slinky/block-complement/chainsaw-test.yaml
new file mode 100644
index 00000000..028296fd
--- /dev/null
+++ b/tests/chainsaw/slinky/block-complement/chainsaw-test.yaml
@@ -0,0 +1,252 @@
+# Copyright 2026 NVIDIA CORPORATION
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: chainsaw.kyverno.io/v1alpha1
+kind: Test
+metadata:
+  name: slinky-block-complement
+spec:
+  description: >
+    Verify block-complementing with missing nodes in two of three NVLink cliques.
+    The topology model defines three cliques under a spine→{leaf-1,leaf-2,leaf-3}
+    switch tree: clique-1 has only node-01 (node-02 absent), clique-2 has both
+    node-03 and node-04, and clique-3 has only node-06 (node-05 absent). Four
+    fake K8s nodes and one fake slurmd pod per node are created. With explicit
+    BlockSizes=2,4,8, each clique occupies one base block of capacity 2 regardless
+    of how many nodes are present. Three cliques fill 3 of the 4 base-block slots
+    required at the top level (4×2=8 nodes to satisfy the 8-node lastBS boundary),
+    so complementBlocks pads the output with one empty placeholder (block004).
+    Nodes absent from their clique appear as unfilled host slots within their base
+    block — they are not emitted in the BlockName line. The Node Observer fires on
+    the fake nodes on startup and auto-triggers generation.
+  concurrent: false
+  steps:
+
+  - name: prepare
+    description: >
+      Create the topology-model ConfigMap with three NVLink cliques where node-02
+      (clique-1) and node-05 (clique-3) are absent, simulating unavailable nodes.
+      Create four fake K8s nodes (node-01, node-03, node-04, node-06) and one fake
+      slurmd pod per node. Each pod is placed on the corresponding fake node via
+      spec.nodeName and status-patched to Ready so the Slinky engine can build its
+      k8s-node→SLURM-hostname map. Fake nodes carry kubernetes.io/os=linux so the
+      Node Observer fires on them.
+    try:
+    - apply:
+        resource:
+          apiVersion: v1
+          kind: ConfigMap
+          metadata:
+            name: topology-test-model
+          data:
+            topology-model.yaml: |
+              switches:
+                spine:
+                  switches:
+                  - leaf-1
+                  - leaf-2
+                  - leaf-3
+                leaf-1:
+                  nodes:
+                  - node-01
+                leaf-2:
+                  nodes:
+                  - node-03
+                  - node-04
+                leaf-3:
+                  nodes:
+                  - node-06
+              nodes:
+                node-01:
+                  attributes:
+                    nvlink: clique-1
+                node-03:
+                  attributes:
+                    nvlink: clique-2
+                node-04:
+                  attributes:
+                    nvlink: clique-2
+                node-06:
+                  attributes:
+                    nvlink: clique-3
+    - apply:
+        resource:
+          apiVersion: v1
+          kind: Node
+          metadata:
+            name: node-01
+            labels:
+              kubernetes.io/os: linux
+    - apply:
+        resource:
+          apiVersion: v1
+          kind: Node
+          metadata:
+            name: node-03
+            labels:
+              kubernetes.io/os: linux
+    - apply:
+        resource:
+          apiVersion: v1
+          kind: Node
+          metadata:
+            name: node-04
+            labels:
+              kubernetes.io/os: linux
+    - apply:
+        resource:
+          apiVersion: v1
+          kind: Node
+          metadata:
+            name: node-06
+            labels:
+              kubernetes.io/os: linux
+    - script:
+        timeout: 30s
+        content: |
+          set -euo pipefail
+          # Create one fake slurmd pod per fake node, directly assigned via spec.nodeName.
+          # The Slinky engine requires ready slurmd pods to build its k8s-node to
+          # SLURM-hostname map. Without pods nodeMap is empty and no nodes are resolved.
+          for NODE in node-01 node-03 node-04 node-06; do
+            kubectl apply -n "$NAMESPACE" -f - <<EOF
+          apiVersion: v1
+          kind: Pod
+          metadata:
+            name: slurmd-${NODE}
+            namespace: ${NAMESPACE}
+            labels:
+              app: slurmd
+          spec:
+            nodeName: ${NODE}
+            hostname: ${NODE}
+            tolerations:
+            - operator: Exists
+            containers:
+            - name: slurmd
+              image: registry.k8s.io/pause:3.9
+          EOF
+          done
+          # Patch each pod's status to Ready so IsPodReady() returns true.
+          for NODE in node-01 node-03 node-04 node-06; do
+            kubectl patch pod "slurmd-${NODE}" -n "$NAMESPACE" \
+              --type=merge \
+              --subresource=status \
+              -p '{"status":{"conditions":[{"type":"Ready","status":"True","lastTransitionTime":"2024-01-01T00:00:00Z"}]}}'
+          done
+
+  - name: install-topograph
+    description: >
+      Install Topograph with the Node Observer using the test provider and Slinky
+      engine configured with topology/block and explicit BlockSizes=2,4,8. The
+      namespace is injected via --set so the Slinky engine writes the
+      slurm-topology ConfigMap into the correct test namespace (where the fake
+      slurmd pods already exist). The observer fires on the fake nodes on startup
+      and auto-triggers generation.
+    try:
+    - script:
+        timeout: 120s
+        content: |
+          set -euo pipefail
+          REPO_ROOT=$(git rev-parse --show-toplevel)
+          IMAGE_REPO="${TOPOGRAPH_IMAGE_REPO:-ghcr.io/nvidia/topograph}"
+          IMAGE_TAG="${TOPOGRAPH_IMAGE_TAG:-}"
+          PULL_POLICY="${TOPOGRAPH_IMAGE_PULL_POLICY:-IfNotPresent}"
+
+          kubectl delete clusterrole topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+          kubectl delete clusterrolebinding topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+
+          helm upgrade --install topograph "${REPO_ROOT}/charts/topograph" \
+            --namespace "$NAMESPACE" --create-namespace \
+            --values "$(pwd)/values.yaml" \
+            --set "image.repository=${IMAGE_REPO}" \
+            ${IMAGE_TAG:+--set "image.tag=${IMAGE_TAG}"} \
+            --set "image.pullPolicy=${PULL_POLICY}" \
+            --set "node-observer.image.repository=${IMAGE_REPO}" \
+            ${IMAGE_TAG:+--set "node-observer.image.tag=${IMAGE_TAG}"} \
+            --set "node-observer.image.pullPolicy=${PULL_POLICY}" \
+            --set "global.engine.params.namespace=${NAMESPACE}" \
+            --wait --timeout 90s
+    catch:
+    - description: Pod status on install failure
+      script:
+        content: |
+          kubectl get pods -n "$NAMESPACE" -o wide
+          kubectl describe pods -n "$NAMESPACE"
+
+  - name: assert
+    description: >
+      The Node Observer fires on the fake nodes on startup, triggering generation
+      automatically. The test provider returns a graph with three NVLink clique
+      domains: clique-1 (node-01 only), clique-2 (node-03 and node-04), and
+      clique-3 (node-06 only). The Slinky engine with BlockSizes=2,4,8 calls
+      complementBlocks: each clique occupies one base block of capacity 2
+      regardless of how many live nodes it has. Three cliques fill 3 of the 4
+      required top-level slots, so block004 is added as an empty placeholder.
+      Nodes absent from a clique (node-02 in clique-1, node-05 in clique-3) are
+      not emitted in their BlockName line — the base block simply has fewer live
+      hosts. Assert the ConfigMap contains all three real blocks with their
+      respective live nodes and the empty complement placeholder.
+    try:
+    - assert:
+        timeout: 60s
+        resource:
+          apiVersion: v1
+          kind: ConfigMap
+          metadata:
+            name: slurm-topology
+          data:
+            topology.conf: |
+              # block001=clique-1
+              BlockName=block001 Nodes=node-01
+              # block002=clique-2
+              BlockName=block002 Nodes=node-[03-04]
+              # block003=clique-3
+              BlockName=block003 Nodes=node-06
+              BlockName=block004
+              BlockSizes=2,4,8
+    catch:
+    - description: Topograph and Node Observer logs on failure
+      script:
+        content: |
+          echo "=== Topograph logs ==="
+          kubectl logs -n "$NAMESPACE" deploy/topograph --tail=80 || true
+          echo "=== Node Observer logs ==="
+          kubectl logs -n "$NAMESPACE" deploy/topograph-node-observer --tail=50 || true
+          echo "=== slurm-topology content ==="
+          kubectl get configmap slurm-topology -n "$NAMESPACE" -o yaml 2>/dev/null || true
+    finally:
+    - script:
+        timeout: 120s
+        content: |
+          helm uninstall topograph -n "$NAMESPACE" --wait --timeout 60s || true
+          kubectl delete clusterrole topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+          kubectl delete clusterrolebinding topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+          kubectl delete pod \
+            slurmd-node-01 slurmd-node-03 slurmd-node-04 slurmd-node-06 \
+            -n "$NAMESPACE" --ignore-not-found --grace-period=0 --force 2>/dev/null || true
+          kubectl delete namespace "$NAMESPACE" --timeout=60s || true
+    - delete:
+        ref:
+          apiVersion: v1
+          kind: Node
+          name: node-01
+    - delete:
+        ref:
+          apiVersion: v1
+          kind: Node
+          name: node-03
+    - delete:
+        ref:
+          apiVersion: v1
+          kind: Node
+          name: node-04
+    - delete:
+        ref:
+          apiVersion: v1
+          kind: Node
+          name: node-06
diff --git a/tests/chainsaw/slinky/block-complement/values.yaml b/tests/chainsaw/slinky/block-complement/values.yaml
new file mode 100644
index 00000000..38aff418
--- /dev/null
+++ b/tests/chainsaw/slinky/block-complement/values.yaml
@@ -0,0 +1,41 @@
+global:
+  provider:
+    name: test
+    params:
+      modelFileName: /etc/topograph/models/topology-model.yaml
+  engine:
+    name: slinky
+    params:
+      plugin: topology/block
+      blockSizes:
+      - 2
+      - 4
+      - 8
+      podSelector:
+        matchLabels:
+          app: slurmd
+      topologyConfigmapName: slurm-topology
+      topologyConfigPath: topology.conf
+      # namespace is injected at install time via --set global.engine.params.namespace=$NAMESPACE
+
+config:
+  requestAggregationDelay: 1s
+
+node-observer:
+  topograph:
+    trigger:
+      nodeSelector:
+        kubernetes.io/os: linux
+
+node-data-broker:
+  enabled: false
+
+# test specific values to mount the topology-model.yaml ConfigMap into the Topograph pod
+volumes:
+- name: topology-test-model
+  configMap:
+    name: topology-test-model
+
+volumeMounts:
+- name: topology-test-model
+  mountPath: /etc/topograph/models
diff --git a/tests/chainsaw/slinky/dra-provider/chainsaw-test.yaml b/tests/chainsaw/slinky/dra-provider/chainsaw-test.yaml
new file mode 100644
index 00000000..a02a9a31
--- /dev/null
+++ b/tests/chainsaw/slinky/dra-provider/chainsaw-test.yaml
@@ -0,0 +1,226 @@
+# Copyright 2026 NVIDIA CORPORATION
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: chainsaw.kyverno.io/v1alpha1
+kind: Test
+metadata:
+  name: slinky-dra-provider
+spec:
+  description: >
+    Verify that the DRA provider discovers NVLink clique topology from
+    nvidia.com/gpu.clique node labels and that the Slinky engine writes a
+    correct block topology.conf into a ConfigMap. Four fake nodes are arranged
+    in two cliques (node-01/node-02 in clique-1, node-03/node-04 in clique-2).
+    Each node carries the topograph.nvidia.com/instance and
+    topograph.nvidia.com/region annotations that the Slinky engine uses to build
+    the instance map. A fake slurmd pod (status-patched to Ready) is placed on
+    each fake node so the Slinky engine can resolve the k8s-node→SLURM-hostname
+    mapping it requires. The Node Observer fires on startup and auto-triggers
+    generation. The Slinky engine translates the DRA domain map into
+    topology/block format: one BlockName entry per NVLink clique.
+  concurrent: false
+  steps:
+
+  - name: prepare
+    description: >
+      Create four fake K8s Node objects arranged in two NVLink cliques, then
+      create one fake slurmd Pod per node (status-patched to Ready) so the
+      Slinky engine can build its k8s-node→SLURM-hostname mapping.
+      Each node carries:
+        - nvidia.com/gpu.clique: the NVLink clique ID (read by the DRA provider)
+        - topograph.nvidia.com/instance: the instance ID (= node name)
+        - topograph.nvidia.com/region: "local" (set by node-data-broker in production)
+        - kubernetes.io/os: linux (triggers the Node Observer)
+    try:
+    - apply:
+        resource:
+          apiVersion: v1
+          kind: Node
+          metadata:
+            name: node-01
+            labels:
+              kubernetes.io/os: linux
+              nvidia.com/gpu.clique: clique-1
+            annotations:
+              topograph.nvidia.com/instance: node-01
+              topograph.nvidia.com/region: local
+    - apply:
+        resource:
+          apiVersion: v1
+          kind: Node
+          metadata:
+            name: node-02
+            labels:
+              kubernetes.io/os: linux
+              nvidia.com/gpu.clique: clique-1
+            annotations:
+              topograph.nvidia.com/instance: node-02
+              topograph.nvidia.com/region: local
+    - apply:
+        resource:
+          apiVersion: v1
+          kind: Node
+          metadata:
+            name: node-03
+            labels:
+              kubernetes.io/os: linux
+              nvidia.com/gpu.clique: clique-2
+            annotations:
+              topograph.nvidia.com/instance: node-03
+              topograph.nvidia.com/region: local
+    - apply:
+        resource:
+          apiVersion: v1
+          kind: Node
+          metadata:
+            name: node-04
+            labels:
+              kubernetes.io/os: linux
+              nvidia.com/gpu.clique: clique-2
+            annotations:
+              topograph.nvidia.com/instance: node-04
+              topograph.nvidia.com/region: local
+    - script:
+        timeout: 30s
+        content: |
+          set -euo pipefail
+          # Create one fake slurmd pod per fake node, directly assigned via spec.nodeName.
+          # The Slinky engine requires ready slurmd pods to build its k8s-node to
+          # SLURM-hostname map. Without pods nodeMap is empty and no nodes are resolved.
+          for NODE in node-01 node-02 node-03 node-04; do
+            kubectl apply -n "$NAMESPACE" -f - <<EOF
+          apiVersion: v1
+          kind: Pod
+          metadata:
+            name: slurmd-${NODE}
+            namespace: ${NAMESPACE}
+            labels:
+              app: slurmd
+          spec:
+            nodeName: ${NODE}
+            hostname: ${NODE}
+            tolerations:
+            - operator: Exists
+            containers:
+            - name: slurmd
+              image: registry.k8s.io/pause:3.9
+          EOF
+          done
+          # Patch each pod's status to Ready so IsPodReady() returns true.
+          for NODE in node-01 node-02 node-03 node-04; do
+            kubectl patch pod "slurmd-${NODE}" -n "$NAMESPACE" \
+              --type=merge \
+              --subresource=status \
+              -p '{"status":{"conditions":[{"type":"Ready","status":"True","lastTransitionTime":"2024-01-01T00:00:00Z"}]}}'
+          done
+
+  - name: install-topograph
+    description: >
+      Install Topograph together with the Node Observer using the DRA provider
+      and Slinky engine. No model ConfigMap is needed — the DRA provider reads
+      nvidia.com/gpu.clique labels and topograph.nvidia.com/* annotations
+      directly from the K8s API. The namespace is injected via --set so the
+      Slinky engine writes the slurm-topology ConfigMap into the correct test
+      namespace (where the fake slurmd pods already exist). The observer fires
+      on startup and auto-triggers generation.
+    try:
+    - script:
+        timeout: 120s
+        content: |
+          set -euo pipefail
+          REPO_ROOT=$(git rev-parse --show-toplevel)
+          IMAGE_REPO="${TOPOGRAPH_IMAGE_REPO:-ghcr.io/nvidia/topograph}"
+          IMAGE_TAG="${TOPOGRAPH_IMAGE_TAG:-}"
+          PULL_POLICY="${TOPOGRAPH_IMAGE_PULL_POLICY:-IfNotPresent}"
+
+          kubectl delete clusterrole topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+          kubectl delete clusterrolebinding topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+
+          helm upgrade --install topograph "${REPO_ROOT}/charts/topograph" \
+            --namespace "$NAMESPACE" --create-namespace \
+            --values "$(pwd)/values.yaml" \
+            --set "image.repository=${IMAGE_REPO}" \
+            ${IMAGE_TAG:+--set "image.tag=${IMAGE_TAG}"} \
+            --set "image.pullPolicy=${PULL_POLICY}" \
+            --set "node-observer.image.repository=${IMAGE_REPO}" \
+            ${IMAGE_TAG:+--set "node-observer.image.tag=${IMAGE_TAG}"} \
+            --set "node-observer.image.pullPolicy=${PULL_POLICY}" \
+            --set "global.engine.params.namespace=${NAMESPACE}" \
+            --wait --timeout 90s
+    catch:
+    - description: Pod status on install failure
+      script:
+        content: |
+          kubectl get pods -n "$NAMESPACE" -o wide
+          kubectl describe pods -n "$NAMESPACE"
+
+  - name: assert
+    description: >
+      The Node Observer fires on the fake nodes on startup, triggering generation
+      automatically. The DRA provider groups nodes by nvidia.com/gpu.clique into
+      two domains. The Slinky engine uses the fake slurmd pods to resolve k8s
+      node names to SLURM hostnames, then translates the domain map into
+      topology/block format and writes the slurm-topology ConfigMap. Assert the
+      ConfigMap contains the correct BlockName entries for each NVLink clique and
+      the auto-calculated BlockSizes (2 cliques x 2 nodes each -> BlockSizes=2,4).
+    try:
+    - assert:
+        timeout: 60s
+        resource:
+          apiVersion: v1
+          kind: ConfigMap
+          metadata:
+            name: slurm-topology
+          data:
+            topology.conf: |
+              # block001=clique-1
+              BlockName=block001 Nodes=node-[01-02]
+              # block002=clique-2
+              BlockName=block002 Nodes=node-[03-04]
+              BlockSizes=2,4
+    catch:
+    - description: Topograph and Node Observer logs on failure
+      script:
+        content: |
+          echo "=== Topograph logs ==="
+          kubectl logs -n "$NAMESPACE" deploy/topograph --tail=80 || true
+          echo "=== Node Observer logs ==="
+          kubectl logs -n "$NAMESPACE" deploy/topograph-node-observer --tail=50 || true
+          echo "=== ConfigMaps ==="
+          kubectl get configmaps -n "$NAMESPACE"
+          echo "=== slurm-topology content ==="
+          kubectl get configmap slurm-topology -n "$NAMESPACE" -o yaml 2>/dev/null || true
+    finally:
+    - script:
+        timeout: 120s
+        content: |
+          helm uninstall topograph -n "$NAMESPACE" --wait --timeout 60s || true
+          kubectl delete clusterrole topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+          kubectl delete clusterrolebinding topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+          kubectl delete pod slurmd-node-01 slurmd-node-02 slurmd-node-03 slurmd-node-04 \
+            -n "$NAMESPACE" --ignore-not-found --grace-period=0 --force 2>/dev/null || true
+          kubectl delete namespace "$NAMESPACE" --timeout=60s || true
+    - delete:
+        ref:
+          apiVersion: v1
+          kind: Node
+          name: node-01
+    - delete:
+        ref:
+          apiVersion: v1
+          kind: Node
+          name: node-02
+    - delete:
+        ref:
+          apiVersion: v1
+          kind: Node
+          name: node-03
+    - delete:
+        ref:
+          apiVersion: v1
+          kind: Node
+          name: node-04
diff --git a/tests/chainsaw/slinky/dra-provider/values.yaml b/tests/chainsaw/slinky/dra-provider/values.yaml
new file mode 100644
index 00000000..1cf1de5e
--- /dev/null
+++ b/tests/chainsaw/slinky/dra-provider/values.yaml
@@ -0,0 +1,25 @@
+global:
+  provider:
+    name: dra
+  engine:
+    name: slinky
+    params:
+      plugin: topology/block
+      podSelector:
+        matchLabels:
+          app: slurmd
+      topologyConfigmapName: slurm-topology
+      topologyConfigPath: topology.conf
+      # namespace is injected at install time via --set global.engine.params.namespace=$NAMESPACE
+
+config:
+  requestAggregationDelay: 1s
+
+node-observer:
+  topograph:
+    trigger:
+      nodeSelector:
+        kubernetes.io/os: linux
+
+node-data-broker:
+  enabled: false
diff --git a/tests/chainsaw/slinky/dynamic-nodes/chainsaw-test.yaml b/tests/chainsaw/slinky/dynamic-nodes/chainsaw-test.yaml
new file mode 100644
index 00000000..830f5d8f
--- /dev/null
+++ b/tests/chainsaw/slinky/dynamic-nodes/chainsaw-test.yaml
@@ -0,0 +1,288 @@
+# Copyright 2026 NVIDIA CORPORATION
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: chainsaw.kyverno.io/v1alpha1
+kind: Test
+metadata:
+  name: slinky-dynamic-nodes
+spec:
+  description: >
+    Verify the skeleton-only ConfigMap and per-node topology annotations produced
+    by the Slinky engine when useDynamicNodes=true and configUpdateMode=skeleton-only.
+    The topology model defines three NVLink cliques under a spine→{leaf-1,leaf-2,leaf-3}
+    switch tree: clique-1 has only node-01 (node-02 absent), clique-2 has both
+    node-03 and node-04, and clique-3 has only node-06 (node-05 absent). Four fake
+    K8s nodes and one fake slurmd pod per node are created. With BlockSizes=2,4,8
+    and three cliques, complementBlocks adds block004 as an empty placeholder. With
+    skeleton-only mode ALL BlockName lines are written WITHOUT Nodes= so that Slinky
+    can reconstruct membership from per-node annotations. performReconciliation then
+    annotates each K8s node with topology.slinky.slurm.net/spec pointing to its
+    assigned block.
+  concurrent: false
+  steps:
+
+  - name: prepare
+    description: >
+      Create the topology-model ConfigMap with three NVLink cliques where node-02
+      (clique-1) and node-05 (clique-3) are absent, simulating unavailable nodes.
+      Create four fake K8s nodes (node-01, node-03, node-04, node-06) and one fake
+      slurmd pod per node. Each pod is placed on the corresponding fake node via
+      spec.nodeName and status-patched to Ready so the Slinky engine can build its
+      k8s-node→SLURM-hostname map for performReconciliation. Fake nodes carry
+      kubernetes.io/os=linux so the Node Observer fires on them.
+    try:
+    - apply:
+        resource:
+          apiVersion: v1
+          kind: ConfigMap
+          metadata:
+            name: topology-test-model
+          data:
+            topology-model.yaml: |
+              switches:
+                spine:
+                  switches:
+                  - leaf-1
+                  - leaf-2
+                  - leaf-3
+                leaf-1:
+                  nodes:
+                  - node-01
+                leaf-2:
+                  nodes:
+                  - node-03
+                  - node-04
+                leaf-3:
+                  nodes:
+                  - node-06
+              nodes:
+                node-01:
+                  attributes:
+                    nvlink: clique-1
+                node-03:
+                  attributes:
+                    nvlink: clique-2
+                node-04:
+                  attributes:
+                    nvlink: clique-2
+                node-06:
+                  attributes:
+                    nvlink: clique-3
+    - apply:
+        resource:
+          apiVersion: v1
+          kind: Node
+          metadata:
+            name: node-01
+            labels:
+              kubernetes.io/os: linux
+    - apply:
+        resource:
+          apiVersion: v1
+          kind: Node
+          metadata:
+            name: node-03
+            labels:
+              kubernetes.io/os: linux
+    - apply:
+        resource:
+          apiVersion: v1
+          kind: Node
+          metadata:
+            name: node-04
+            labels:
+              kubernetes.io/os: linux
+    - apply:
+        resource:
+          apiVersion: v1
+          kind: Node
+          metadata:
+            name: node-06
+            labels:
+              kubernetes.io/os: linux
+    - script:
+        timeout: 30s
+        content: |
+          set -euo pipefail
+          # Create one fake slurmd pod per fake node, directly assigned via spec.nodeName.
+          # performReconciliation uses these pods to build the k8s-node→SLURM-hostname map
+          # before annotating each node with topology.slinky.slurm.net/spec.
+          for NODE in node-01 node-03 node-04 node-06; do
+            kubectl apply -n "$NAMESPACE" -f - <<EOF
+          apiVersion: v1
+          kind: Pod
+          metadata:
+            name: slurmd-${NODE}
+            namespace: ${NAMESPACE}
+            labels:
+              app: slurmd
+          spec:
+            nodeName: ${NODE}
+            hostname: ${NODE}
+            tolerations:
+            - operator: Exists
+            containers:
+            - name: slurmd
+              image: registry.k8s.io/pause:3.9
+          EOF
+          done
+          # Patch each pod's status to Ready so IsPodReady() returns true.
+          for NODE in node-01 node-03 node-04 node-06; do
+            kubectl patch pod "slurmd-${NODE}" -n "$NAMESPACE" \
+              --type=merge \
+              --subresource=status \
+              -p '{"status":{"conditions":[{"type":"Ready","status":"True","lastTransitionTime":"2024-01-01T00:00:00Z"}]}}'
+          done
+
+  - name: install-topograph
+    description: >
+      Install Topograph with the Node Observer using the test provider and Slinky
+      engine configured with topology/block, BlockSizes=2,4,8, useDynamicNodes=true,
+      and configUpdateMode=skeleton-only. The namespace is injected via --set so the
+      Slinky engine writes the slurm-topology ConfigMap and node annotations into the
+      correct test namespace. The observer fires on the fake nodes on startup and
+      auto-triggers generation.
+    try:
+    - script:
+        timeout: 120s
+        content: |
+          set -euo pipefail
+          REPO_ROOT=$(git rev-parse --show-toplevel)
+          IMAGE_REPO="${TOPOGRAPH_IMAGE_REPO:-ghcr.io/nvidia/topograph}"
+          IMAGE_TAG="${TOPOGRAPH_IMAGE_TAG:-}"
+          PULL_POLICY="${TOPOGRAPH_IMAGE_PULL_POLICY:-IfNotPresent}"
+
+          kubectl delete clusterrole topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+          kubectl delete clusterrolebinding topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+
+          helm upgrade --install topograph "${REPO_ROOT}/charts/topograph" \
+            --namespace "$NAMESPACE" --create-namespace \
+            --values "$(pwd)/values.yaml" \
+            --set "image.repository=${IMAGE_REPO}" \
+            ${IMAGE_TAG:+--set "image.tag=${IMAGE_TAG}"} \
+            --set "image.pullPolicy=${PULL_POLICY}" \
+            --set "node-observer.image.repository=${IMAGE_REPO}" \
+            ${IMAGE_TAG:+--set "node-observer.image.tag=${IMAGE_TAG}"} \
+            --set "node-observer.image.pullPolicy=${PULL_POLICY}" \
+            --set "global.engine.params.namespace=${NAMESPACE}" \
+            --wait --timeout 90s
+    catch:
+    - description: Pod status on install failure
+      script:
+        content: |
+          kubectl get pods -n "$NAMESPACE" -o wide
+          kubectl describe pods -n "$NAMESPACE"
+
+  - name: assert
+    description: >
+      The Node Observer fires on the fake nodes on startup, triggering generation.
+      With configUpdateMode=skeleton-only the slurm-topology ConfigMap is written
+      with all BlockName lines omitting Nodes= — including the complement placeholder
+      block004. With useDynamicNodes=true performReconciliation then annotates each
+      K8s node with topology.slinky.slurm.net/spec pointing to its assigned block:
+      node-01→block001, node-03→block002, node-04→block002, node-06→block003.
+    try:
+    - assert:
+        timeout: 60s
+        resource:
+          apiVersion: v1
+          kind: ConfigMap
+          metadata:
+            name: slurm-topology
+          data:
+            topology.conf: |
+              # block001=clique-1
+              BlockName=block001
+              # block002=clique-2
+              BlockName=block002
+              # block003=clique-3
+              BlockName=block003
+              BlockName=block004
+              BlockSizes=2,4,8
+    - assert:
+        timeout: 30s
+        resource:
+          apiVersion: v1
+          kind: Node
+          metadata:
+            name: node-01
+            annotations:
+              topology.slinky.slurm.net/spec: "default:block001"
+    - assert:
+        timeout: 30s
+        resource:
+          apiVersion: v1
+          kind: Node
+          metadata:
+            name: node-03
+            annotations:
+              topology.slinky.slurm.net/spec: "default:block002"
+    - assert:
+        timeout: 30s
+        resource:
+          apiVersion: v1
+          kind: Node
+          metadata:
+            name: node-04
+            annotations:
+              topology.slinky.slurm.net/spec: "default:block002"
+    - assert:
+        timeout: 30s
+        resource:
+          apiVersion: v1
+          kind: Node
+          metadata:
+            name: node-06
+            annotations:
+              topology.slinky.slurm.net/spec: "default:block003"
+    catch:
+    - description: Topograph and Node Observer logs on failure
+      script:
+        content: |
+          echo "=== Topograph logs ==="
+          kubectl logs -n "$NAMESPACE" deploy/topograph --tail=80 || true
+          echo "=== Node Observer logs ==="
+          kubectl logs -n "$NAMESPACE" deploy/topograph-node-observer --tail=50 || true
+          echo "=== slurm-topology content ==="
+          kubectl get configmap slurm-topology -n "$NAMESPACE" -o yaml 2>/dev/null || true
+          echo "=== node annotations ==="
+          for NODE in node-01 node-03 node-04 node-06; do
+            echo "--- ${NODE} ---"
+            kubectl get node "${NODE}" -o jsonpath='{.metadata.annotations}' 2>/dev/null || true
+            echo
+          done
+    finally:
+    - script:
+        timeout: 120s
+        content: |
+          helm uninstall topograph -n "$NAMESPACE" --wait --timeout 60s || true
+          kubectl delete clusterrole topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+          kubectl delete clusterrolebinding topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+          kubectl delete pod \
+            slurmd-node-01 slurmd-node-03 slurmd-node-04 slurmd-node-06 \
+            -n "$NAMESPACE" --ignore-not-found --grace-period=0 --force 2>/dev/null || true
+          kubectl delete namespace "$NAMESPACE" --timeout=60s || true
+    - delete:
+        ref:
+          apiVersion: v1
+          kind: Node
+          name: node-01
+    - delete:
+        ref:
+          apiVersion: v1
+          kind: Node
+          name: node-03
+    - delete:
+        ref:
+          apiVersion: v1
+          kind: Node
+          name: node-04
+    - delete:
+        ref:
+          apiVersion: v1
+          kind: Node
+          name: node-06
diff --git a/tests/chainsaw/slinky/dynamic-nodes/values.yaml b/tests/chainsaw/slinky/dynamic-nodes/values.yaml
new file mode 100644
index 00000000..206e9b93
--- /dev/null
+++ b/tests/chainsaw/slinky/dynamic-nodes/values.yaml
@@ -0,0 +1,43 @@
+global:
+  provider:
+    name: test
+    params:
+      modelFileName: /etc/topograph/models/topology-model.yaml
+  engine:
+    name: slinky
+    params:
+      plugin: topology/block
+      blockSizes:
+      - 2
+      - 4
+      - 8
+      podSelector:
+        matchLabels:
+          app: slurmd
+      topologyConfigmapName: slurm-topology
+      topologyConfigPath: topology.conf
+      useDynamicNodes: true
+      configUpdateMode: skeleton-only
+      # namespace is injected at install time via --set global.engine.params.namespace=$NAMESPACE
+
+config:
+  requestAggregationDelay: 1s
+
+node-observer:
+  topograph:
+    trigger:
+      nodeSelector:
+        kubernetes.io/os: linux
+
+node-data-broker:
+  enabled: false
+
+# test specific values to mount the topology-model.yaml ConfigMap into the Topograph pod
+volumes:
+- name: topology-test-model
+  configMap:
+    name: topology-test-model
+
+volumeMounts:
+- name: topology-test-model
+  mountPath: /etc/topograph/models
diff --git a/tests/chainsaw/slinky/tree-topology/chainsaw-test.yaml b/tests/chainsaw/slinky/tree-topology/chainsaw-test.yaml
new file mode 100644
index 00000000..32303935
--- /dev/null
+++ b/tests/chainsaw/slinky/tree-topology/chainsaw-test.yaml
@@ -0,0 +1,123 @@
+# Copyright 2026 NVIDIA CORPORATION
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: chainsaw.kyverno.io/v1alpha1
+kind: Test
+metadata:
+  name: slinky-tree-topology
+spec:
+  description: >
+    Verify that the Slinky engine writes a correct topology.conf fragment into
+    a ConfigMap. The test uses a static topology model (S1→S2/S3, node-01 under
+    S2 and node-02 under S3) embedded inline in a ConfigMap and mounted into the
+    pod. The Node Observer fires on cluster nodes (which already carry the
+    kubernetes.io/os=linux label) and auto-triggers generation — no manual POST
+    is required. The Slinky engine derives topology.conf entirely from the graph
+    switch structure; no actual slurmd pods are needed.
+  concurrent: false
+  steps:
+
+  - name: prepare-model-configmap
+    description: Create the topology-model ConfigMap with inline content.
+    try:
+    - apply:
+        resource:
+          apiVersion: v1
+          kind: ConfigMap
+          metadata:
+            name: topology-test-model
+          data:
+            topology-model.yaml: |
+              switches:
+                S1:
+                  switches:
+                  - S2
+                  - S3
+                S2:
+                  nodes:
+                  - node-01
+                S3:
+                  nodes:
+                  - node-02
+              nodes:
+                node-01: {}
+                node-02: {}
+
+  - name: install-topograph
+    description: >
+      Install Topograph together with the Node Observer. The namespace is
+      injected via --set so the Slinky engine writes the slurm-topology
+      ConfigMap into the correct test namespace. The observer fires on cluster
+      nodes on startup, auto-triggering generation.
+    try:
+    - script:
+        timeout: 120s
+        content: |
+          set -euo pipefail
+          REPO_ROOT=$(git rev-parse --show-toplevel)
+          IMAGE_REPO="${TOPOGRAPH_IMAGE_REPO:-ghcr.io/nvidia/topograph}"
+          IMAGE_TAG="${TOPOGRAPH_IMAGE_TAG:-}"
+          PULL_POLICY="${TOPOGRAPH_IMAGE_PULL_POLICY:-IfNotPresent}"
+
+          kubectl delete clusterrole topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+          kubectl delete clusterrolebinding topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+
+          helm upgrade --install topograph "${REPO_ROOT}/charts/topograph" \
+            --namespace "$NAMESPACE" --create-namespace \
+            --values "$(pwd)/values.yaml" \
+            --set "image.repository=${IMAGE_REPO}" \
+            ${IMAGE_TAG:+--set "image.tag=${IMAGE_TAG}"} \
+            --set "image.pullPolicy=${PULL_POLICY}" \
+            --set "node-observer.image.repository=${IMAGE_REPO}" \
+            ${IMAGE_TAG:+--set "node-observer.image.tag=${IMAGE_TAG}"} \
+            --set "node-observer.image.pullPolicy=${PULL_POLICY}" \
+            --set "global.engine.params.namespace=${NAMESPACE}" \
+            --wait --timeout 90s
+    catch:
+    - description: Pod status on install failure
+      script:
+        content: |
+          kubectl get pods -n "$NAMESPACE" -o wide
+          kubectl describe pods -n "$NAMESPACE"
+
+  - name: assert
+    description: >
+      The Node Observer fires on cluster nodes on startup, triggering generation
+      automatically. The Slinky engine derives topology.conf from the graph
+      switch structure (S1→S2/S3) and writes it into the slurm-topology
+      ConfigMap. Assert the ConfigMap contains the correct topology.conf entries.
+    try:
+    - assert:
+        timeout: 60s
+        resource:
+          apiVersion: v1
+          kind: ConfigMap
+          metadata:
+            name: slurm-topology
+          data:
+            topology.conf: |
+              SwitchName=S1 Switches=S[2-3]
+              SwitchName=S2 Nodes=node-01
+              SwitchName=S3 Nodes=node-02
+    catch:
+    - description: Topograph and Node Observer logs on failure
+      script:
+        content: |
+          echo "=== Topograph logs ==="
+          kubectl logs -n "$NAMESPACE" deploy/topograph --tail=50 || true
+          echo "=== Node Observer logs ==="
+          kubectl logs -n "$NAMESPACE" deploy/topograph-node-observer --tail=50 || true
+          echo "=== ConfigMaps ==="
+          kubectl get configmaps -n "$NAMESPACE"
+    finally:
+    - script:
+        timeout: 120s
+        content: |
+          helm uninstall topograph -n "$NAMESPACE" --wait --timeout 60s || true
+          kubectl delete clusterrole topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+          kubectl delete clusterrolebinding topograph topograph-node-observer \
+            --ignore-not-found 2>/dev/null || true
+          kubectl delete namespace "$NAMESPACE" --timeout=60s || true
diff --git a/tests/chainsaw/slinky/tree-topology/values.yaml b/tests/chainsaw/slinky/tree-topology/values.yaml
new file mode 100644
index 00000000..4342f815
--- /dev/null
+++ b/tests/chainsaw/slinky/tree-topology/values.yaml
@@ -0,0 +1,37 @@
+global:
+  provider:
+    name: test
+    params:
+      modelFileName: /etc/topograph/models/topology-model.yaml
+  engine:
+    name: slinky
+    params:
+      plugin: topology/tree
+      podSelector:
+        matchLabels:
+          app: slurmd
+      topologyConfigmapName: slurm-topology
+      topologyConfigPath: topology.conf
+      # namespace is injected at install time via --set global.engine.params.namespace=$NAMESPACE
+
+config:
+  requestAggregationDelay: 1s
+
+node-observer:
+  topograph:
+    trigger:
+      nodeSelector:
+        kubernetes.io/os: linux
+
+node-data-broker:
+  enabled: false
+
+# test specific values to mount the topology-model.yaml ConfigMap into the Topograph pod
+volumes:
+- name: topology-test-model
+  configMap:
+    name: topology-test-model
+
+volumeMounts:
+- name: topology-test-model
+  mountPath: /etc/topograph/models