From ae223a7576f1f9636ec43827b3563e4ae98b2326 Mon Sep 17 00:00:00 2001 From: Fabio Bertinatto Date: Thu, 23 Apr 2026 07:19:50 -0400 Subject: [PATCH] kms: add post-check hook to revision controller The idea is to validate revision data before marking ready --- .../controllerset/apiservercontrollerset.go | 1 + .../revisioncontroller/revision_controller.go | 30 ++++- .../revision_controller_test.go | 122 +++++++++++++++++- pkg/operator/staticpod/controllers.go | 10 +- 4 files changed, 156 insertions(+), 7 deletions(-) diff --git a/pkg/operator/apiserver/controllerset/apiservercontrollerset.go b/pkg/operator/apiserver/controllerset/apiservercontrollerset.go index 938fd965f7..bf551ac6d7 100644 --- a/pkg/operator/apiserver/controllerset/apiservercontrollerset.go +++ b/pkg/operator/apiserver/controllerset/apiservercontrollerset.go @@ -323,6 +323,7 @@ func (cs *APIServerControllerSet) WithRevisionController( secretGetter, cs.eventRecorder, nil, + nil, ) return cs } diff --git a/pkg/operator/revisioncontroller/revision_controller.go b/pkg/operator/revisioncontroller/revision_controller.go index ed2a96c3d2..824b3b8a78 100644 --- a/pkg/operator/revisioncontroller/revision_controller.go +++ b/pkg/operator/revisioncontroller/revision_controller.go @@ -39,7 +39,8 @@ type RevisionController struct { configMapGetter corev1client.ConfigMapsGetter secretGetter corev1client.SecretsGetter - revisionPrecondition PreconditionFunc + revisionPrecondition PreconditionFunc + revisionReadinessCheck ReadinessCheckFunc } type RevisionResource struct { @@ -50,6 +51,9 @@ type RevisionResource struct { // PreconditionFunc checks if revision precondition is met (is true) and then proceeeds with the creation of new revision type PreconditionFunc func(ctx context.Context) (bool, error) +// ReadinessCheckFunc validates the assembled revision data after all resources have been copied but before the revision is marked as ready. +type ReadinessCheckFunc func(ctx context.Context, revision int32) (bool, error) + // NewRevisionController create a new revision controller. func NewRevisionController( instanceName string, @@ -62,12 +66,18 @@ func NewRevisionController( secretGetter corev1client.SecretsGetter, eventRecorder events.Recorder, revisionPrecondition PreconditionFunc, + revisionReadinessCheck ReadinessCheckFunc, ) factory.Controller { if revisionPrecondition == nil { revisionPrecondition = func(ctx context.Context) (bool, error) { return true, nil } } + if revisionReadinessCheck == nil { + revisionReadinessCheck = func(_ context.Context, _ int32) (bool, error) { + return true, nil + } + } c := &RevisionController{ controllerInstanceName: factory.ControllerInstanceName(instanceName, "RevisionController"), @@ -75,10 +85,11 @@ func NewRevisionController( configMaps: configMaps, secrets: secrets, - operatorClient: operatorClient, - configMapGetter: configMapGetter, - secretGetter: secretGetter, - revisionPrecondition: revisionPrecondition, + operatorClient: operatorClient, + configMapGetter: configMapGetter, + secretGetter: secretGetter, + revisionPrecondition: revisionPrecondition, + revisionReadinessCheck: revisionReadinessCheck, } return factory.New(). @@ -309,6 +320,15 @@ func (c RevisionController) createNewRevision(ctx context.Context, recorder even } } + // We run the revision readiness check function here because at this point in time, the revision + // ConfigMap is already created, but it doesn't yet have the "revision-ready: true" annotation. + // This means that the installer pod has not yet started deploying the revision. + if ready, err := c.revisionReadinessCheck(ctx, revision); err != nil { + return false, fmt.Errorf("revision readiness check failed for revision %d: %w", revision, err) + } else if !ready { + return false, nil + } + if createdStatus.Annotations == nil { createdStatus.Annotations = map[string]string{} } diff --git a/pkg/operator/revisioncontroller/revision_controller_test.go b/pkg/operator/revisioncontroller/revision_controller_test.go index 2968ca996f..8e076c9dea 100644 --- a/pkg/operator/revisioncontroller/revision_controller_test.go +++ b/pkg/operator/revisioncontroller/revision_controller_test.go @@ -3,12 +3,13 @@ package revisioncontroller import ( "context" "fmt" - clocktesting "k8s.io/utils/clock/testing" "reflect" "strings" "testing" "time" + clocktesting "k8s.io/utils/clock/testing" + "github.com/openshift/library-go/pkg/controller/factory" "github.com/openshift/library-go/pkg/operator/v1helpers" "github.com/stretchr/testify/require" @@ -541,6 +542,7 @@ func TestRevisionController(t *testing.T) { kubeClient.CoreV1(), eventRecorder, nil, + nil, ) syncErr := c.Sync(context.TODO(), factory.NewSyncContext("RevisionController", eventRecorder)) if tc.validateStatus != nil { @@ -620,6 +622,7 @@ func TestRevisionControllerRevisionCreatedFailedStatusUpdate(t *testing.T) { kubeClient.CoreV1(), eventRecorder, nil, + nil, ) klog.Infof("Running NewRevisionController.Sync with UpdateLatestRevisionOperatorStatus returning an error") @@ -816,6 +819,7 @@ func TestSyncWithRevisionPrecondition(t *testing.T) { kubeClient.CoreV1(), eventRecorder, tc.revisionPrecondition, + nil, ) syncErr := c.Sync(context.TODO(), factory.NewSyncContext("RevisionController", eventRecorder)) require.Equal(t, syncErr, tc.expSyncErr) @@ -826,3 +830,119 @@ func TestSyncWithRevisionPrecondition(t *testing.T) { }) } } + +func TestSyncWithRevisionReadinessCheck(t *testing.T) { + startingObjects := []runtime.Object{ + &v1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "test-secret", Namespace: targetNamespace}}, + &v1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "test-config", Namespace: targetNamespace}}, + &v1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "revision-status", Namespace: targetNamespace}}, + &v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "revision-status-1", Namespace: targetNamespace}, + Data: map[string]string{"revision": "1"}, + }, + } + newStaticPodOperatorClient := func() v1helpers.StaticPodOperatorClient { + return v1helpers.NewFakeStaticPodOperatorClient( + &operatorv1.StaticPodOperatorSpec{ + OperatorSpec: operatorv1.OperatorSpec{ + ManagementState: operatorv1.Managed, + }, + }, + &operatorv1.StaticPodOperatorStatus{ + OperatorStatus: operatorv1.OperatorStatus{ + LatestAvailableRevision: 1, + }, + NodeStatuses: []operatorv1.NodeStatus{ + { + NodeName: "test-node-1", + CurrentRevision: 1, + TargetRevision: 0, + }, + }, + }, + nil, + nil, + ) + } + + tests := []struct { + testName string + revisionReadinessCheck ReadinessCheckFunc + expSyncErr error + expUpdatedLatestAvailableRevision int32 + expRevisionReady string + }{ + { + // when readiness check is nil, the default implementation is considered. In this case no error is expected to be + // returned by sync, LatestAvailableRevision should be updated, and the revision should be marked as ready + testName: "readiness check is not supplied", + revisionReadinessCheck: nil, + expSyncErr: nil, + expUpdatedLatestAvailableRevision: 2, + expRevisionReady: "true", + }, + { + // when readiness check passes, no error is expected to be returned by sync, + // LatestAvailableRevision should be updated, and the revision should be marked as ready + testName: "readiness check passes", + revisionReadinessCheck: func(ctx context.Context, revision int32) (bool, error) { + return true, nil + }, + expSyncErr: nil, + expUpdatedLatestAvailableRevision: 2, + expRevisionReady: "true", + }, + { + // when readiness check fails, no error should be returned by sync, LatestAvailableRevision should not be updated, + // and the revision should not be marked as ready even though resources were copied + testName: "readiness check fails", + revisionReadinessCheck: func(ctx context.Context, revision int32) (bool, error) { + return false, nil + }, + expSyncErr: nil, + expUpdatedLatestAvailableRevision: 1, + expRevisionReady: "false", + }, + { + // when readiness check returns error, a wrapped error should be returned by sync, LatestAvailableRevision + // should not be updated, and the revision should not be marked as ready even though resources were copied + testName: "readiness check returns error", + revisionReadinessCheck: func(ctx context.Context, revision int32) (bool, error) { + return true, fmt.Errorf("Error") + }, + expSyncErr: fmt.Errorf("revision readiness check failed for revision 2: %w", fmt.Errorf("Error")), + expUpdatedLatestAvailableRevision: 1, + expRevisionReady: "false", + }, + } + for _, tc := range tests { + t.Run(tc.testName, func(t *testing.T) { + staticPodOperatorClient := newStaticPodOperatorClient() + kubeClient := fake.NewClientset(startingObjects...) + eventRecorder := events.NewRecorder(kubeClient.CoreV1().Events("test"), "test-operator", &v1.ObjectReference{}, clocktesting.NewFakePassiveClock(time.Now())) + + c := NewRevisionController( + "testing", + targetNamespace, + []RevisionResource{{Name: "test-config"}}, + []RevisionResource{{Name: "test-secret"}}, + informers.NewSharedInformerFactoryWithOptions(kubeClient, 1*time.Minute, informers.WithNamespace(targetNamespace)), + staticPodOperatorClient, + kubeClient.CoreV1(), + kubeClient.CoreV1(), + eventRecorder, + nil, + tc.revisionReadinessCheck, + ) + syncErr := c.Sync(context.TODO(), factory.NewSyncContext("RevisionController", eventRecorder)) + require.Equal(t, tc.expSyncErr, syncErr) + + _, status, _, _ := staticPodOperatorClient.GetStaticPodOperatorState() + require.Equal(t, tc.expUpdatedLatestAvailableRevision, status.LatestAvailableRevision) + + revisionStatus, err := kubeClient.CoreV1().ConfigMaps(targetNamespace).Get(context.TODO(), "revision-status-2", metav1.GetOptions{}) + require.NoError(t, err, "revision-status-2 configmap should exist") + require.Equal(t, tc.expRevisionReady, revisionStatus.Annotations["operator.openshift.io/revision-ready"]) + }) + } +} diff --git a/pkg/operator/staticpod/controllers.go b/pkg/operator/staticpod/controllers.go index dc824715f1..62722c66c3 100644 --- a/pkg/operator/staticpod/controllers.go +++ b/pkg/operator/staticpod/controllers.go @@ -81,7 +81,8 @@ type staticPodOperatorControllerBuilder struct { pdbUnhealthyPodEvictionPolicy *v1.UnhealthyPodEvictionPolicyType guardCreateConditionalFunc func() (bool, bool, error) - revisionControllerPrecondition revisioncontroller.PreconditionFunc + revisionControllerPrecondition revisioncontroller.PreconditionFunc + revisionControllerReadinessCheck revisioncontroller.ReadinessCheckFunc extraNodeSelector labels.Selector } @@ -131,6 +132,7 @@ type Builder interface { // Use this with caution, as this option can disrupt perspective pods that have not yet had a chance to become healthy. WithPodDisruptionBudgetGuard(operatorNamespace, operatorName, readyzPort, readyzEndpoint string, pdbUnhealthyPodEvictionPolicy *v1.UnhealthyPodEvictionPolicyType, createConditionalFunc func() (bool, bool, error)) Builder WithRevisionControllerPrecondition(revisionControllerPrecondition revisioncontroller.PreconditionFunc) Builder + WithRevisionControllerReadinessCheck(revisionControllerReadinessCheck revisioncontroller.ReadinessCheckFunc) Builder ToControllers() (manager.ControllerManager, error) } @@ -223,6 +225,11 @@ func (b *staticPodOperatorControllerBuilder) WithRevisionControllerPrecondition( return b } +func (b *staticPodOperatorControllerBuilder) WithRevisionControllerReadinessCheck(revisionControllerReadinessCheck revisioncontroller.ReadinessCheckFunc) Builder { + b.revisionControllerReadinessCheck = revisionControllerReadinessCheck + return b +} + func (b *staticPodOperatorControllerBuilder) ToControllers() (manager.ControllerManager, error) { manager := manager.NewControllerManager() @@ -261,6 +268,7 @@ func (b *staticPodOperatorControllerBuilder) ToControllers() (manager.Controller secretClient, eventRecorder, b.revisionControllerPrecondition, + b.revisionControllerReadinessCheck, ), 1) } else { errs = append(errs, fmt.Errorf("missing revisionController; cannot proceed"))