Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ func (cs *APIServerControllerSet) WithRevisionController(
secretGetter,
cs.eventRecorder,
nil,
nil,
)
return cs
}
Expand Down
30 changes: 25 additions & 5 deletions pkg/operator/revisioncontroller/revision_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ type RevisionController struct {
configMapGetter corev1client.ConfigMapsGetter
secretGetter corev1client.SecretsGetter

revisionPrecondition PreconditionFunc
revisionPrecondition PreconditionFunc
revisionReadinessCheck ReadinessCheckFunc
}

type RevisionResource struct {
Expand All @@ -50,6 +51,9 @@ type RevisionResource struct {
// PreconditionFunc checks if revision precondition is met (is true) and then proceeeds with the creation of new revision
type PreconditionFunc func(ctx context.Context) (bool, error)

// ReadinessCheckFunc validates the assembled revision data after all resources have been copied but before the revision is marked as ready.
type ReadinessCheckFunc func(ctx context.Context, revision int32) (bool, error)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just realized that Postcondition might not be the best name because the check is done before the operator conditions are set, right before the revision is marked as ready, not after the condition is set. Maybe RevisionReadinessCheck is a better name?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I went ahead and renamed it

// NewRevisionController create a new revision controller.
func NewRevisionController(
instanceName string,
Expand All @@ -62,23 +66,30 @@ func NewRevisionController(
secretGetter corev1client.SecretsGetter,
eventRecorder events.Recorder,
revisionPrecondition PreconditionFunc,
revisionReadinessCheck ReadinessCheckFunc,
) factory.Controller {
if revisionPrecondition == nil {
revisionPrecondition = func(ctx context.Context) (bool, error) {
return true, nil
}
}
if revisionReadinessCheck == nil {
revisionReadinessCheck = func(_ context.Context, _ int32) (bool, error) {
return true, nil
}
}

c := &RevisionController{
controllerInstanceName: factory.ControllerInstanceName(instanceName, "RevisionController"),
targetNamespace: targetNamespace,
configMaps: configMaps,
secrets: secrets,

operatorClient: operatorClient,
configMapGetter: configMapGetter,
secretGetter: secretGetter,
revisionPrecondition: revisionPrecondition,
operatorClient: operatorClient,
configMapGetter: configMapGetter,
secretGetter: secretGetter,
revisionPrecondition: revisionPrecondition,
revisionReadinessCheck: revisionReadinessCheck,
}

return factory.New().
Expand Down Expand Up @@ -309,6 +320,15 @@ func (c RevisionController) createNewRevision(ctx context.Context, recorder even
}
}

// We run the revision readiness check function here because at this point in time, the revision
// ConfigMap is already created, but it doesn't yet have the "revision-ready: true" annotation.
// This means that the installer pod has not yet started deploying the revision.
if ready, err := c.revisionReadinessCheck(ctx, revision); err != nil {
return false, fmt.Errorf("revision readiness check failed for revision %d: %w", revision, err)
} else if !ready {
return false, nil
}

if createdStatus.Annotations == nil {
createdStatus.Annotations = map[string]string{}
}
Expand Down
122 changes: 121 additions & 1 deletion pkg/operator/revisioncontroller/revision_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@ package revisioncontroller
import (
"context"
"fmt"
clocktesting "k8s.io/utils/clock/testing"
"reflect"
"strings"
"testing"
"time"

clocktesting "k8s.io/utils/clock/testing"

"github.com/openshift/library-go/pkg/controller/factory"
"github.com/openshift/library-go/pkg/operator/v1helpers"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -541,6 +542,7 @@ func TestRevisionController(t *testing.T) {
kubeClient.CoreV1(),
eventRecorder,
nil,
nil,
)
syncErr := c.Sync(context.TODO(), factory.NewSyncContext("RevisionController", eventRecorder))
if tc.validateStatus != nil {
Expand Down Expand Up @@ -620,6 +622,7 @@ func TestRevisionControllerRevisionCreatedFailedStatusUpdate(t *testing.T) {
kubeClient.CoreV1(),
eventRecorder,
nil,
nil,
)

klog.Infof("Running NewRevisionController.Sync with UpdateLatestRevisionOperatorStatus returning an error")
Expand Down Expand Up @@ -816,6 +819,7 @@ func TestSyncWithRevisionPrecondition(t *testing.T) {
kubeClient.CoreV1(),
eventRecorder,
tc.revisionPrecondition,
nil,
)
syncErr := c.Sync(context.TODO(), factory.NewSyncContext("RevisionController", eventRecorder))
require.Equal(t, syncErr, tc.expSyncErr)
Expand All @@ -826,3 +830,119 @@ func TestSyncWithRevisionPrecondition(t *testing.T) {
})
}
}

func TestSyncWithRevisionReadinessCheck(t *testing.T) {
startingObjects := []runtime.Object{
&v1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "test-secret", Namespace: targetNamespace}},
&v1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "test-config", Namespace: targetNamespace}},
&v1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "revision-status", Namespace: targetNamespace}},
&v1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{Name: "revision-status-1", Namespace: targetNamespace},
Data: map[string]string{"revision": "1"},
},
}
newStaticPodOperatorClient := func() v1helpers.StaticPodOperatorClient {
return v1helpers.NewFakeStaticPodOperatorClient(
&operatorv1.StaticPodOperatorSpec{
OperatorSpec: operatorv1.OperatorSpec{
ManagementState: operatorv1.Managed,
},
},
&operatorv1.StaticPodOperatorStatus{
OperatorStatus: operatorv1.OperatorStatus{
LatestAvailableRevision: 1,
},
NodeStatuses: []operatorv1.NodeStatus{
{
NodeName: "test-node-1",
CurrentRevision: 1,
TargetRevision: 0,
},
},
},
nil,
nil,
)
}

tests := []struct {
testName string
revisionReadinessCheck ReadinessCheckFunc
expSyncErr error
expUpdatedLatestAvailableRevision int32
expRevisionReady string
}{
{
// when readiness check is nil, the default implementation is considered. In this case no error is expected to be
// returned by sync, LatestAvailableRevision should be updated, and the revision should be marked as ready
testName: "readiness check is not supplied",
revisionReadinessCheck: nil,
expSyncErr: nil,
expUpdatedLatestAvailableRevision: 2,
expRevisionReady: "true",
},
{
// when readiness check passes, no error is expected to be returned by sync,
// LatestAvailableRevision should be updated, and the revision should be marked as ready
testName: "readiness check passes",
revisionReadinessCheck: func(ctx context.Context, revision int32) (bool, error) {
return true, nil
},
expSyncErr: nil,
expUpdatedLatestAvailableRevision: 2,
expRevisionReady: "true",
},
{
// when readiness check fails, no error should be returned by sync, LatestAvailableRevision should not be updated,
// and the revision should not be marked as ready even though resources were copied
testName: "readiness check fails",
revisionReadinessCheck: func(ctx context.Context, revision int32) (bool, error) {
return false, nil
},
expSyncErr: nil,
expUpdatedLatestAvailableRevision: 1,
expRevisionReady: "false",
},
{
// when readiness check returns error, a wrapped error should be returned by sync, LatestAvailableRevision
// should not be updated, and the revision should not be marked as ready even though resources were copied
testName: "readiness check returns error",
revisionReadinessCheck: func(ctx context.Context, revision int32) (bool, error) {
return true, fmt.Errorf("Error")
},
expSyncErr: fmt.Errorf("revision readiness check failed for revision 2: %w", fmt.Errorf("Error")),
expUpdatedLatestAvailableRevision: 1,
expRevisionReady: "false",
},
}
for _, tc := range tests {
t.Run(tc.testName, func(t *testing.T) {
staticPodOperatorClient := newStaticPodOperatorClient()
kubeClient := fake.NewClientset(startingObjects...)
eventRecorder := events.NewRecorder(kubeClient.CoreV1().Events("test"), "test-operator", &v1.ObjectReference{}, clocktesting.NewFakePassiveClock(time.Now()))

c := NewRevisionController(
"testing",
targetNamespace,
[]RevisionResource{{Name: "test-config"}},
[]RevisionResource{{Name: "test-secret"}},
informers.NewSharedInformerFactoryWithOptions(kubeClient, 1*time.Minute, informers.WithNamespace(targetNamespace)),
staticPodOperatorClient,
kubeClient.CoreV1(),
kubeClient.CoreV1(),
eventRecorder,
nil,
tc.revisionReadinessCheck,
)
syncErr := c.Sync(context.TODO(), factory.NewSyncContext("RevisionController", eventRecorder))
require.Equal(t, tc.expSyncErr, syncErr)

_, status, _, _ := staticPodOperatorClient.GetStaticPodOperatorState()
require.Equal(t, tc.expUpdatedLatestAvailableRevision, status.LatestAvailableRevision)
Comment on lines +940 to +941

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Handle GetStaticPodOperatorState error before asserting on status.

The error return is currently discarded, which can hide state-fetch failures and make the assertion run on invalid data.

As per coding guidelines, "Never ignore error returns".

✅ Suggested fix
-			_, status, _, _ := staticPodOperatorClient.GetStaticPodOperatorState()
+			_, status, _, statusErr := staticPodOperatorClient.GetStaticPodOperatorState()
+			require.NoError(t, statusErr)
 			require.Equal(t, tc.expUpdatedLatestAvailableRevision, status.LatestAvailableRevision)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
_, status, _, _ := staticPodOperatorClient.GetStaticPodOperatorState()
require.Equal(t, tc.expUpdatedLatestAvailableRevision, status.LatestAvailableRevision)
_, status, _, statusErr := staticPodOperatorClient.GetStaticPodOperatorState()
require.NoError(t, statusErr)
require.Equal(t, tc.expUpdatedLatestAvailableRevision, status.LatestAvailableRevision)
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@pkg/operator/revisioncontroller/revision_controller_test.go` around lines 940
- 941, The error return value from the GetStaticPodOperatorState() call on
staticPodOperatorClient is being discarded with an underscore, which can mask
state-fetch failures and cause the status assertion to run on invalid data.
Capture the error return value instead of ignoring it, and add a
require.NoError() assertion immediately after the GetStaticPodOperatorState()
call to ensure the state was successfully fetched before proceeding with the
status assertion.

Source: Coding guidelines


revisionStatus, err := kubeClient.CoreV1().ConfigMaps(targetNamespace).Get(context.TODO(), "revision-status-2", metav1.GetOptions{})
require.NoError(t, err, "revision-status-2 configmap should exist")
require.Equal(t, tc.expRevisionReady, revisionStatus.Annotations["operator.openshift.io/revision-ready"])
})
}
}
10 changes: 9 additions & 1 deletion pkg/operator/staticpod/controllers.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ type staticPodOperatorControllerBuilder struct {
pdbUnhealthyPodEvictionPolicy *v1.UnhealthyPodEvictionPolicyType
guardCreateConditionalFunc func() (bool, bool, error)

revisionControllerPrecondition revisioncontroller.PreconditionFunc
revisionControllerPrecondition revisioncontroller.PreconditionFunc
revisionControllerReadinessCheck revisioncontroller.ReadinessCheckFunc

extraNodeSelector labels.Selector
}
Expand Down Expand Up @@ -131,6 +132,7 @@ type Builder interface {
// Use this with caution, as this option can disrupt perspective pods that have not yet had a chance to become healthy.
WithPodDisruptionBudgetGuard(operatorNamespace, operatorName, readyzPort, readyzEndpoint string, pdbUnhealthyPodEvictionPolicy *v1.UnhealthyPodEvictionPolicyType, createConditionalFunc func() (bool, bool, error)) Builder
WithRevisionControllerPrecondition(revisionControllerPrecondition revisioncontroller.PreconditionFunc) Builder
WithRevisionControllerReadinessCheck(revisionControllerReadinessCheck revisioncontroller.ReadinessCheckFunc) Builder
ToControllers() (manager.ControllerManager, error)
}

Expand Down Expand Up @@ -223,6 +225,11 @@ func (b *staticPodOperatorControllerBuilder) WithRevisionControllerPrecondition(
return b
}

func (b *staticPodOperatorControllerBuilder) WithRevisionControllerReadinessCheck(revisionControllerReadinessCheck revisioncontroller.ReadinessCheckFunc) Builder {
b.revisionControllerReadinessCheck = revisionControllerReadinessCheck
return b
}

func (b *staticPodOperatorControllerBuilder) ToControllers() (manager.ControllerManager, error) {
manager := manager.NewControllerManager()

Expand Down Expand Up @@ -261,6 +268,7 @@ func (b *staticPodOperatorControllerBuilder) ToControllers() (manager.Controller
secretClient,
eventRecorder,
b.revisionControllerPrecondition,
b.revisionControllerReadinessCheck,
), 1)
} else {
errs = append(errs, fmt.Errorf("missing revisionController; cannot proceed"))
Expand Down