From fa0930adeb9954bebd6679d004e9082eb585da97 Mon Sep 17 00:00:00 2001 From: Evan Vetere Date: Wed, 24 Jun 2026 14:07:49 -0400 Subject: [PATCH 1/2] fix: clear error for hostname-less tenant listeners Tenant Gateways whose listeners use the managed ports (80/443) without a hostname were rejected at admission with the opaque upstream CEL message "Combination of port, protocol and hostname must be unique for each listener", even for a single listener. Root cause: the mutating defaulter injects default-http/default-https listeners with no hostname at creation. A tenant listener on the same port/protocol without a hostname produces a duplicate (port, protocol, hostname) tuple, which the upstream Gateway API CRD CEL rejects. CRD CEL validation runs after mutating webhooks but before validating webhooks, so the equivalent check in validateListeners is preempted and never surfaces at creation. Reject hostname-less tenant (non-default) listeners in the defaulting webhook, before injecting the defaults, so the operator returns an actionable message instead of leaking the CEL error. Closes #219 --- internal/validation/gateway_validation.go | 31 ++++++++++++ .../validation/gateway_validation_test.go | 49 +++++++++++++++++++ internal/webhook/v1/gateway_webhook.go | 9 ++++ 3 files changed, 89 insertions(+) diff --git a/internal/validation/gateway_validation.go b/internal/validation/gateway_validation.go index a59bcdbb..dbefeda7 100644 --- a/internal/validation/gateway_validation.go +++ b/internal/validation/gateway_validation.go @@ -41,6 +41,37 @@ func ValidateGateway(gateway *gatewayv1.Gateway, opts GatewayValidationOptions) return allErrs } +// ValidateTenantListenerHostnames enforces that every tenant-provided +// (non-default) listener carries a hostname. It is meant to run in the mutating +// defaulting webhook, before SetDefaultListeners injects the hostname-less +// default-http/default-https listeners. +// +// here be dragons +// +// Without this check, a tenant listener on a managed port (80/443) with no +// hostname collides with an injected default on the same port and protocol — +// both have an empty hostname — and the upstream Gateway API CRD CEL rejects the +// whole object with the opaque "Combination of port, protocol and hostname must +// be unique for each listener". CRD CEL validation runs after mutating webhooks +// but before validating webhooks, so the equivalent check in validateListeners +// is preempted and never surfaces for this case at creation time. +func ValidateTenantListenerHostnames(gateway *gatewayv1.Gateway) field.ErrorList { + allErrs := field.ErrorList{} + fldPath := field.NewPath("spec", "listeners") + + for i, l := range gateway.Spec.Listeners { + if gatewayutil.IsDefaultListener(l) { + continue + } + if l.Hostname == nil { + allErrs = append(allErrs, field.Required(fldPath.Index(i).Child("hostname"), + "a hostname must be set; this controller injects default HTTP/HTTPS listeners, so listeners you define must specify a distinguishing hostname")) + } + } + + return allErrs +} + func validateListeners(gateway *gatewayv1.Gateway, fldPath *field.Path, opts GatewayValidationOptions) field.ErrorList { allErrs := field.ErrorList{} diff --git a/internal/validation/gateway_validation_test.go b/internal/validation/gateway_validation_test.go index 5919ed94..743129be 100644 --- a/internal/validation/gateway_validation_test.go +++ b/internal/validation/gateway_validation_test.go @@ -433,3 +433,52 @@ func TestValidateListenersAllowsExistingHostnameInStatus(t *testing.T) { assert.Len(t, errs, 0, "expected validateListeners to permit a hostname on a default listener that matches an existing status address") } + +func TestValidateTenantListenerHostnames(t *testing.T) { + hostnamePath := field.NewPath("spec", "listeners").Index(0).Child("hostname") + hostnameRequired := field.Required(hostnamePath, + "a hostname must be set; this controller injects default HTTP/HTTPS listeners, so listeners you define must specify a distinguishing hostname") + + scenarios := map[string]struct { + listeners []gatewayv1.Listener + expectedErrors field.ErrorList + }{ + "bare gateway, no listeners": { + listeners: nil, + expectedErrors: field.ErrorList{}, + }, + "tenant listener without hostname is rejected": { + listeners: []gatewayv1.Listener{ + {Name: "http", Protocol: gatewayv1.HTTPProtocolType, Port: 80}, + }, + expectedErrors: field.ErrorList{hostnameRequired}, + }, + "tenant listener with hostname is permitted": { + listeners: []gatewayv1.Listener{ + {Name: "http", Protocol: gatewayv1.HTTPProtocolType, Port: 80, Hostname: ptr.To(gatewayv1.Hostname("custom.example.com"))}, + }, + expectedErrors: field.ErrorList{}, + }, + "default-named listeners are exempt": { + listeners: []gatewayv1.Listener{ + {Name: gatewayutil.DefaultHTTPListenerName, Protocol: gatewayv1.HTTPProtocolType, Port: 80}, + {Name: gatewayutil.DefaultHTTPSListenerName, Protocol: gatewayv1.HTTPSProtocolType, Port: 443}, + }, + expectedErrors: field.ErrorList{}, + }, + } + + for name, scenario := range scenarios { + t.Run(name, func(t *testing.T) { + gateway := &gatewayv1.Gateway{ + Spec: gatewayv1.GatewaySpec{Listeners: scenario.listeners}, + } + + errs := ValidateTenantListenerHostnames(gateway) + + if diff := cmp.Diff(scenario.expectedErrors, errs, cmpopts.IgnoreFields(field.Error{}, "BadValue")); diff != "" { + t.Errorf("unexpected errors (-want +got):\n%s", diff) + } + }) + } +} diff --git a/internal/webhook/v1/gateway_webhook.go b/internal/webhook/v1/gateway_webhook.go index 0a34acc8..79a2a8e5 100644 --- a/internal/webhook/v1/gateway_webhook.go +++ b/internal/webhook/v1/gateway_webhook.go @@ -157,6 +157,15 @@ func (d *GatewayCustomDefaulter) Default(ctx context.Context, gateway *gatewayv1 // of creation. if gateway.CreationTimestamp.IsZero() { + // Reject hostname-less tenant listeners here, before injecting the + // default listeners. The injected defaults share ports 80/443 with no + // hostname, so a colliding tenant listener would otherwise be rejected + // by the upstream CRD CEL (which runs before the validating webhook) + // with an opaque "must be unique" message instead of this one. + if errs := validation.ValidateTenantListenerHostnames(gateway); len(errs) > 0 { + return apierrors.NewInvalid(gateway.GetObjectKind().GroupVersionKind().GroupKind(), gateway.GetName(), errs) + } + gatewayutil.SetDefaultListeners(gateway, d.config.Gateway) } From 37195cead00cc31c1ecf2945c4b8e344ecf9e6e9 Mon Sep 17 00:00:00 2001 From: Evan Vetere Date: Fri, 26 Jun 2026 14:36:52 -0400 Subject: [PATCH 2/2] test(e2e): assert fixed hostname-required message in collision test The gateway-listener-hostname-collision e2e test (merged via #221) was written to reproduce #219: it asserted that hostname-less tenant listeners are rejected with the opaque upstream CEL message "Combination of port, protocol and hostname must be unique for each listener". This branch fixes #219 by rejecting those listeners in NSO's defaulting webhook (before the CRD CEL stage) with a clear, actionable message. That flips the expected error, so probes A/B failed against this branch. Update probes A/B to grep for "a hostname must be set" and reframe the header/descriptions from "reproduce the bug" to "verify the fix". Probe D (listener with a hostname is admitted) is unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../chainsaw-test.yaml | 70 ++++++++++--------- 1 file changed, 38 insertions(+), 32 deletions(-) diff --git a/test/e2e/gateway-listener-hostname-collision/chainsaw-test.yaml b/test/e2e/gateway-listener-hostname-collision/chainsaw-test.yaml index 2ef818e3..cb0115c5 100644 --- a/test/e2e/gateway-listener-hostname-collision/chainsaw-test.yaml +++ b/test/e2e/gateway-listener-hostname-collision/chainsaw-test.yaml @@ -1,39 +1,44 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/kyverno/chainsaw/main/.schemas/json/test-chainsaw-v1alpha1.json # # Issue #219 — a tenant Gateway whose listeners use the default ports/protocols -# (80/HTTP, 443/HTTPS) WITHOUT a hostname is rejected at admission with the -# misleading upstream message: +# (80/HTTP, 443/HTTPS) WITHOUT a hostname used to be rejected at admission with +# the misleading upstream CEL message: # # Combination of port, protocol and hostname must be unique for each listener # -# The spec is valid per upstream Gateway API (listener hostname is optional). -# The rejection comes from NSO's default-listener injection: the mutating -# webhook unconditionally appends hostname-less default-http (80/HTTP) and -# default-https (443/HTTPS) listeners. When a tenant defines their own 80/HTTP -# or 443/HTTPS listener and omits the hostname, defaulting produces duplicate -# (port, protocol, hostname=empty) tuples, and the upstream CRD CEL rejects the -# whole object before NSO's own validating webhook can surface a clear reason. +# Root cause: NSO's default-listener injection. The mutating webhook +# unconditionally appends hostname-less default-http (80/HTTP) and default-https +# (443/HTTPS) listeners. When a tenant defines their own 80/HTTP or 443/HTTPS +# listener and omits the hostname, defaulting produced duplicate +# (port, protocol, hostname=empty) tuples, and the upstream CRD CEL rejected the +# whole object before NSO's own validating webhook could surface a clear reason. # -# This is a pure admission-time reproduction: the rejection happens on the +# The fix (#220) rejects hostname-less tenant (non-default) listeners in NSO's +# defaulting webhook, which runs BEFORE the CRD CEL stage. The object is still +# rejected — but now with an actionable message: +# +# spec.listeners[N].hostname: Required value: a hostname must be set; this +# controller injects default HTTP/HTTPS listeners, so listeners you define +# must specify a distinguishing hostname +# +# This test now asserts the FIXED behavior: probes A/B are still rejected, but +# with the clear hostname-required message instead of the opaque "must be +# unique" CEL error. Probe D (listener WITH a hostname) is still admitted. +# +# This is a pure admission-time check: the rejection happens on the # control-plane (nso-standard) cluster, so no downstream/edge wiring is needed. # We drive it exactly the way the issue did — kubectl apply --dry-run=server — # which runs the mutating webhook + CRD CEL without persisting anything. # # The probes mirror the issue's investigation matrix: -# B (decisive) — 1 listener, 80/HTTP, no hostname -> rejected -# A — 2 listeners, 80/HTTP + 443/HTTPS, no hostname -> rejected +# B (decisive) — 1 listener, 80/HTTP, no hostname -> rejected (clear msg) +# A — 2 listeners, 80/HTTP + 443/HTTPS, no hostname -> rejected (clear msg) # D (control) — 1 listener, 80/HTTP, WITH a hostname -> accepted # # Probe B is decisive: a single tenant listener cannot collide with itself, yet -# it is rejected — proving the failure is the injected-default collision, not -# tenant-side duplication. Probe D is the positive control: adding a hostname -# makes the tuple distinct from the injected defaults and the object is admitted. -# -# Run against unfixed code (main / this branch), all three probes pass: the bug -# reproduces. Once datum-cloud/network-services-operator#220 merges, probes A/B -# are rejected by NSO's defaulting webhook FIRST, with a clear hostname-required -# message instead of "must be unique" — so this test's expected message will -# change with the fix (see the grep in each probe). +# it is rejected — the rejection comes from the hostname-required rule guarding +# the injected defaults, not tenant-side duplication. Probe D is the positive +# control: adding a hostname makes the tuple distinct and the object is admitted. # # concurrent: false — creating the shared managed GatewayClass coexists with the # other gateway e2e tests via apply (not create), but keep it serialized to @@ -67,9 +72,10 @@ spec: - name: Probe B (decisive) — single hostname-less 80/HTTP listener is rejected description: | - A single tenant listener cannot collide with itself. Its rejection with - "must be unique" proves the injected default-http (80/HTTP, no hostname) - is what duplicates the tuple. + A single tenant listener cannot collide with itself. It is still + rejected — now by NSO's defaulting webhook with a clear hostname-required + message, because the injected default-http (80/HTTP, no hostname) would + otherwise duplicate the tuple. try: - script: cluster: nso-standard @@ -95,14 +101,14 @@ spec: echo "--- apply output (rc=$rc) ---" echo "$out" if [ "$rc" -eq 0 ]; then - echo "FAIL: hostname-less listener was admitted; issue #219 not reproduced" + echo "FAIL: hostname-less listener was admitted; issue #219 not fixed correctly" exit 1 fi - echo "$out" | grep -q "Combination of port, protocol and hostname must be unique for each listener" || { - echo "FAIL: rejected, but not with the misleading 'must be unique' CEL message" + echo "$out" | grep -q "a hostname must be set" || { + echo "FAIL: rejected, but not with the clear hostname-required message (got the opaque CEL error?)" exit 1 } - echo "OK: probe B reproduced the misleading rejection" + echo "OK: probe B rejected with the clear hostname-required message" - name: Probe A — two hostname-less listeners (80/HTTP + 443/HTTPS) are rejected try: @@ -137,14 +143,14 @@ spec: echo "--- apply output (rc=$rc) ---" echo "$out" if [ "$rc" -eq 0 ]; then - echo "FAIL: hostname-less listeners were admitted; issue #219 not reproduced" + echo "FAIL: hostname-less listeners were admitted; issue #219 not fixed correctly" exit 1 fi - echo "$out" | grep -q "Combination of port, protocol and hostname must be unique for each listener" || { - echo "FAIL: rejected, but not with the misleading 'must be unique' CEL message" + echo "$out" | grep -q "a hostname must be set" || { + echo "FAIL: rejected, but not with the clear hostname-required message (got the opaque CEL error?)" exit 1 } - echo "OK: probe A reproduced the misleading rejection" + echo "OK: probe A rejected with the clear hostname-required message" - name: Probe D (control) — single 80/HTTP listener WITH a hostname is accepted description: |