From 8250f57558aa8461b7f157b6f16afb54ad75633e Mon Sep 17 00:00:00 2001 From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Date: Wed, 20 May 2026 00:53:26 +0800 Subject: [PATCH 01/22] feat: add deployment MinReady strategy Signed-off-by: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> --- CHANGELOG.md | 4 + api/v1alpha1/batchrelease_plan_types.go | 5 + api/v1alpha1/conversion.go | 24 +- api/v1alpha1/rollout_conversion_test.go | 94 ++++++ api/v1alpha1/rollout_types.go | 23 ++ api/v1beta1/batchrelease_plan_types.go | 5 + api/v1beta1/rollout_types.go | 23 ++ .../rollouts.kruise.io_batchreleases.yaml | 16 + .../bases/rollouts.kruise.io_rollouts.yaml | 16 + .../deployment_min_ready_quickstart.md | 156 +++++++++ docs/migration/recreate_to_minready.md | 83 +++++ docs/operator/deployment_min_ready_runbook.md | 134 ++++++++ .../batchrelease_controller_test.go | 165 ++++++++++ .../batchrelease/batchrelease_executor.go | 4 + .../control/partitionstyle/control_plane.go | 38 ++- .../partitionstyle/deployment/control.go | 4 + .../deployment/minready_constants.go | 117 +++++++ .../deployment/minready_control.go | 297 ++++++++++++++++++ .../deployment/minready_control_test.go | 263 ++++++++++++++++ .../deployment/minready_finalize_test.go | 128 ++++++++ .../partitionstyle/deployment/minready_pdb.go | 51 +++ .../deployment/minready_test_helpers_test.go | 115 +++++++ .../control/partitionstyle/minready_status.go | 126 ++++++++ .../partitionstyle/minready_status_test.go | 159 ++++++++++ .../batchrelease/metrics/minready_metrics.go | 115 +++++++ .../metrics/minready_metrics_test.go | 85 +++++ pkg/controller/rollout/rollout_canary.go | 1 + .../rollout/rollout_releaseManager_test.go | 20 ++ pkg/feature/rollout_features.go | 3 + pkg/feature/rollout_features_test.go | 29 ++ .../mutating/workload_update_handler.go | 17 +- .../mutating/workload_update_handler_test.go | 113 +++++++ test/e2e/deployment_minready_actions_test.go | 140 +++++++++ test/e2e/deployment_minready_helpers_test.go | 220 +++++++++++++ test/e2e/deployment_minready_pdb_test.go | 73 +++++ ...ployment_minready_scenarios_helper_test.go | 143 +++++++++ test/e2e/deployment_minready_test.go | 136 ++++++++ test/integration/concurrency_test.go | 111 +++++++ test/integration/deployment_minready_test.go | 169 ++++++++++ test/integration/minready_helpers_test.go | 236 ++++++++++++++ 40 files changed, 3645 insertions(+), 16 deletions(-) create mode 100644 api/v1alpha1/rollout_conversion_test.go create mode 100644 docs/getting_started/deployment_min_ready_quickstart.md create mode 100644 docs/migration/recreate_to_minready.md create mode 100644 docs/operator/deployment_min_ready_runbook.md create mode 100644 pkg/controller/batchrelease/control/partitionstyle/deployment/minready_constants.go create mode 100644 pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go create mode 100644 pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go create mode 100644 pkg/controller/batchrelease/control/partitionstyle/deployment/minready_finalize_test.go create mode 100644 pkg/controller/batchrelease/control/partitionstyle/deployment/minready_pdb.go create mode 100644 pkg/controller/batchrelease/control/partitionstyle/deployment/minready_test_helpers_test.go create mode 100644 pkg/controller/batchrelease/control/partitionstyle/minready_status.go create mode 100644 pkg/controller/batchrelease/control/partitionstyle/minready_status_test.go create mode 100644 pkg/controller/batchrelease/metrics/minready_metrics.go create mode 100644 pkg/controller/batchrelease/metrics/minready_metrics_test.go create mode 100644 pkg/feature/rollout_features_test.go create mode 100644 test/e2e/deployment_minready_actions_test.go create mode 100644 test/e2e/deployment_minready_helpers_test.go create mode 100644 test/e2e/deployment_minready_pdb_test.go create mode 100644 test/e2e/deployment_minready_scenarios_helper_test.go create mode 100644 test/e2e/deployment_minready_test.go create mode 100644 test/integration/concurrency_test.go create mode 100644 test/integration/deployment_minready_test.go create mode 100644 test/integration/minready_helpers_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 684878e7..80ab88f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Change Log +## Unreleased +### Key Features: +- Added the alpha Deployment `MinReadySeconds` rollout strategy, including API fields, feature gate, controller routing, observability, documentation, and tests. + ## v0.6.2 ### Bugfix: - Fixed issue where partition deployments got stuck. ([#307](https://github.com/openkruise/rollouts/pull/307),[@AiRanthem](https://github.com/AiRanthem)) diff --git a/api/v1alpha1/batchrelease_plan_types.go b/api/v1alpha1/batchrelease_plan_types.go index b947050a..4ac41599 100644 --- a/api/v1alpha1/batchrelease_plan_types.go +++ b/api/v1alpha1/batchrelease_plan_types.go @@ -56,6 +56,11 @@ type ReleasePlan struct { PatchPodTemplateMetadata *PatchPodTemplateMetadata `json:"patchPodTemplateMetadata,omitempty"` // RollingStyle can be "Canary", "Partiton" or "BlueGreen" RollingStyle RollingStyleType `json:"rollingStyle,omitempty"` + // DeploymentStrategy controls how native Deployment workloads are advanced. + // Empty means Recreate for backward compatibility. + // +kubebuilder:validation:Enum=Recreate;MinReadySeconds + // +optional + DeploymentStrategy DeploymentStrategyType `json:"deploymentStrategy,omitempty"` // EnableExtraWorkloadForCanary indicates whether to create extra workload for canary // True corresponds to RollingStyle "Canary". // False corresponds to RollingStyle "Partiton". diff --git a/api/v1alpha1/conversion.go b/api/v1alpha1/conversion.go index 555c8e5c..b6a5ed5d 100644 --- a/api/v1alpha1/conversion.go +++ b/api/v1alpha1/conversion.go @@ -44,7 +44,8 @@ func (src *Rollout) ConvertTo(dst conversion.Hub) error { obj.Spec.Strategy = v1beta1.RolloutStrategy{ Paused: srcSpec.Strategy.Paused, Canary: &v1beta1.CanaryStrategy{ - FailureThreshold: srcSpec.Strategy.Canary.FailureThreshold, + DeploymentStrategy: v1beta1.DeploymentStrategyType(srcSpec.Strategy.Canary.DeploymentStrategy), + FailureThreshold: srcSpec.Strategy.Canary.FailureThreshold, }, } for _, step := range srcSpec.Strategy.Canary.Steps { @@ -189,7 +190,8 @@ func (dst *Rollout) ConvertFrom(src conversion.Hub) error { Strategy: RolloutStrategy{ Paused: srcV1beta1.Spec.Strategy.Paused, Canary: &CanaryStrategy{ - FailureThreshold: srcV1beta1.Spec.Strategy.Canary.FailureThreshold, + DeploymentStrategy: DeploymentStrategyType(srcV1beta1.Spec.Strategy.Canary.DeploymentStrategy), + FailureThreshold: srcV1beta1.Spec.Strategy.Canary.FailureThreshold, }, }, Disabled: srcV1beta1.Spec.Disabled, @@ -326,10 +328,11 @@ func (src *BatchRelease) ConvertTo(dst conversion.Hub) error { Name: srcSpec.TargetRef.WorkloadRef.Name, } obj.Spec.ReleasePlan = v1beta1.ReleasePlan{ - BatchPartition: srcSpec.ReleasePlan.BatchPartition, - RolloutID: srcSpec.ReleasePlan.RolloutID, - FailureThreshold: srcSpec.ReleasePlan.FailureThreshold, - FinalizingPolicy: v1beta1.FinalizingPolicyType(srcSpec.ReleasePlan.FinalizingPolicy), + BatchPartition: srcSpec.ReleasePlan.BatchPartition, + RolloutID: srcSpec.ReleasePlan.RolloutID, + FailureThreshold: srcSpec.ReleasePlan.FailureThreshold, + FinalizingPolicy: v1beta1.FinalizingPolicyType(srcSpec.ReleasePlan.FinalizingPolicy), + DeploymentStrategy: v1beta1.DeploymentStrategyType(srcSpec.ReleasePlan.DeploymentStrategy), } for _, batch := range srcSpec.ReleasePlan.Batches { o := v1beta1.ReleaseBatch{ @@ -411,10 +414,11 @@ func (dst *BatchRelease) ConvertFrom(src conversion.Hub) error { Name: srcSpec.WorkloadRef.Name, } dst.Spec.ReleasePlan = ReleasePlan{ - BatchPartition: srcSpec.ReleasePlan.BatchPartition, - RolloutID: srcSpec.ReleasePlan.RolloutID, - FailureThreshold: srcSpec.ReleasePlan.FailureThreshold, - FinalizingPolicy: FinalizingPolicyType(srcSpec.ReleasePlan.FinalizingPolicy), + BatchPartition: srcSpec.ReleasePlan.BatchPartition, + RolloutID: srcSpec.ReleasePlan.RolloutID, + FailureThreshold: srcSpec.ReleasePlan.FailureThreshold, + FinalizingPolicy: FinalizingPolicyType(srcSpec.ReleasePlan.FinalizingPolicy), + DeploymentStrategy: DeploymentStrategyType(srcSpec.ReleasePlan.DeploymentStrategy), } for _, batch := range srcSpec.ReleasePlan.Batches { obj := ReleaseBatch{ diff --git a/api/v1alpha1/rollout_conversion_test.go b/api/v1alpha1/rollout_conversion_test.go new file mode 100644 index 00000000..213ef294 --- /dev/null +++ b/api/v1alpha1/rollout_conversion_test.go @@ -0,0 +1,94 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + "testing" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/openkruise/rollouts/api/v1beta1" +) + +func TestRoundTripDeploymentStrategyFromV1alpha1(t *testing.T) { + source := &Rollout{ + ObjectMeta: metav1.ObjectMeta{ + Name: "demo", + Annotations: map[string]string{ + RolloutStyleAnnotation: string(PartitionRollingStyle), + }, + }, + Spec: RolloutSpec{ + ObjectRef: ObjectRef{WorkloadRef: &WorkloadRef{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: "demo", + }}, + Strategy: RolloutStrategy{Canary: &CanaryStrategy{ + DeploymentStrategy: DeploymentStrategyMinReadySeconds, + }}, + }, + } + + hub := &v1beta1.Rollout{} + if err := source.ConvertTo(hub); err != nil { + t.Fatalf("ConvertTo failed: %v", err) + } + if got := hub.Spec.Strategy.Canary.DeploymentStrategy; got != v1beta1.DeploymentStrategyMinReadySeconds { + t.Fatalf("ConvertTo DeploymentStrategy = %q, want %q", got, v1beta1.DeploymentStrategyMinReadySeconds) + } + + roundTripped := &Rollout{} + if err := roundTripped.ConvertFrom(hub); err != nil { + t.Fatalf("ConvertFrom failed: %v", err) + } + if got := roundTripped.Spec.Strategy.Canary.DeploymentStrategy; got != DeploymentStrategyMinReadySeconds { + t.Fatalf("round-trip DeploymentStrategy = %q, want %q", got, DeploymentStrategyMinReadySeconds) + } +} + +func TestRoundTripDeploymentStrategyFromV1beta1(t *testing.T) { + source := &v1beta1.Rollout{ + ObjectMeta: metav1.ObjectMeta{Name: "demo"}, + Spec: v1beta1.RolloutSpec{ + WorkloadRef: v1beta1.ObjectRef{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: "demo", + }, + Strategy: v1beta1.RolloutStrategy{Canary: &v1beta1.CanaryStrategy{ + DeploymentStrategy: v1beta1.DeploymentStrategyMinReadySeconds, + }}, + }, + } + + spoke := &Rollout{} + if err := spoke.ConvertFrom(source); err != nil { + t.Fatalf("ConvertFrom failed: %v", err) + } + if got := spoke.Spec.Strategy.Canary.DeploymentStrategy; got != DeploymentStrategyMinReadySeconds { + t.Fatalf("ConvertFrom DeploymentStrategy = %q, want %q", got, DeploymentStrategyMinReadySeconds) + } + + roundTripped := &v1beta1.Rollout{} + if err := spoke.ConvertTo(roundTripped); err != nil { + t.Fatalf("ConvertTo failed: %v", err) + } + if got := roundTripped.Spec.Strategy.Canary.DeploymentStrategy; got != v1beta1.DeploymentStrategyMinReadySeconds { + t.Fatalf("round-trip DeploymentStrategy = %q, want %q", got, v1beta1.DeploymentStrategyMinReadySeconds) + } +} diff --git a/api/v1alpha1/rollout_types.go b/api/v1alpha1/rollout_types.go index 526c110e..4939164b 100644 --- a/api/v1alpha1/rollout_types.go +++ b/api/v1alpha1/rollout_types.go @@ -105,6 +105,11 @@ type RolloutStrategy struct { // CanaryStrategy defines parameters for a Replica Based Canary type CanaryStrategy struct { + // DeploymentStrategy controls how native Deployment workloads are advanced. + // Empty means Recreate for backward compatibility. + // +kubebuilder:validation:Enum=Recreate;MinReadySeconds + // +optional + DeploymentStrategy DeploymentStrategyType `json:"deploymentStrategy,omitempty"` // Steps define the order of phases to execute release in batches(20%, 40%, 60%, 80%, 100%) // +optional Steps []CanaryStep `json:"steps,omitempty"` @@ -125,6 +130,15 @@ type CanaryStrategy struct { DisableGenerateCanaryService bool `json:"disableGenerateCanaryService,omitempty"` } +type DeploymentStrategyType string + +const ( + // DeploymentStrategyRecreate keeps the existing Recreate-based Deployment rollout behavior. + DeploymentStrategyRecreate DeploymentStrategyType = "Recreate" + // DeploymentStrategyMinReadySeconds uses native RollingUpdate with inflated minReadySeconds. + DeploymentStrategyMinReadySeconds DeploymentStrategyType = "MinReadySeconds" +) + type PatchPodTemplateMetadata struct { // annotations Annotations map[string]string `json:"annotations,omitempty"` @@ -219,6 +233,15 @@ const ( // Terminating Reason TerminatingReasonInTerminating = "InTerminating" TerminatingReasonCompleted = "Completed" + + // MinReadyInitialized indicates MinReadySeconds strategy initialization has completed. + RolloutConditionMinReadyInitialized RolloutConditionType = "MinReadyInitialized" + // RolloutConditionMinReadyBatching indicates MinReadySeconds strategy batch processing is active. + RolloutConditionMinReadyBatching RolloutConditionType = "MinReadyBatching" + // RolloutConditionMinReadyDegraded indicates MinReadySeconds strategy hit an explicit blocking error. + RolloutConditionMinReadyDegraded RolloutConditionType = "MinReadyDegraded" + // RolloutConditionMinReadyFinalized indicates MinReadySeconds strategy finalization has completed. + RolloutConditionMinReadyFinalized RolloutConditionType = "MinReadyFinalized" ) // CanaryStatus status fields that only pertain to the canary rollout diff --git a/api/v1beta1/batchrelease_plan_types.go b/api/v1beta1/batchrelease_plan_types.go index ddc428a3..250245c1 100644 --- a/api/v1beta1/batchrelease_plan_types.go +++ b/api/v1beta1/batchrelease_plan_types.go @@ -56,6 +56,11 @@ type ReleasePlan struct { PatchPodTemplateMetadata *PatchPodTemplateMetadata `json:"patchPodTemplateMetadata,omitempty"` // RollingStyle can be "Canary", "Partiton" or "BlueGreen" RollingStyle RollingStyleType `json:"rollingStyle,omitempty"` + // DeploymentStrategy controls how native Deployment workloads are advanced. + // Empty means Recreate for backward compatibility. + // +kubebuilder:validation:Enum=Recreate;MinReadySeconds + // +optional + DeploymentStrategy DeploymentStrategyType `json:"deploymentStrategy,omitempty"` // EnableExtraWorkloadForCanary indicates whether to create extra workload for canary // True corresponds to RollingStyle "Canary". // False corresponds to RollingStyle "Partiton". diff --git a/api/v1beta1/rollout_types.go b/api/v1beta1/rollout_types.go index 06b108e6..00d21d42 100644 --- a/api/v1beta1/rollout_types.go +++ b/api/v1beta1/rollout_types.go @@ -192,6 +192,11 @@ type BlueGreenStrategy struct { // CanaryStrategy defines parameters for a Replica Based Canary type CanaryStrategy struct { + // DeploymentStrategy controls how native Deployment workloads are advanced. + // Empty means Recreate for backward compatibility. + // +kubebuilder:validation:Enum=Recreate;MinReadySeconds + // +optional + DeploymentStrategy DeploymentStrategyType `json:"deploymentStrategy,omitempty"` // Steps define the order of phases to execute release in batches(20%, 40%, 60%, 80%, 100%) // +kubebuilder:validation:MaxItems=50 // +optional @@ -219,6 +224,15 @@ type CanaryStrategy struct { DisableGenerateCanaryService bool `json:"disableGenerateCanaryService,omitempty"` } +type DeploymentStrategyType string + +const ( + // DeploymentStrategyRecreate keeps the existing Recreate-based Deployment rollout behavior. + DeploymentStrategyRecreate DeploymentStrategyType = "Recreate" + // DeploymentStrategyMinReadySeconds uses native RollingUpdate with inflated minReadySeconds. + DeploymentStrategyMinReadySeconds DeploymentStrategyType = "MinReadySeconds" +) + type PatchPodTemplateMetadata struct { // annotations Annotations map[string]string `json:"annotations,omitempty"` @@ -384,6 +398,15 @@ const ( TerminatingReasonInTerminating = "InTerminating" TerminatingReasonCompleted = "Completed" + // MinReadyInitialized indicates MinReadySeconds strategy initialization has completed. + RolloutConditionMinReadyInitialized RolloutConditionType = "MinReadyInitialized" + // RolloutConditionMinReadyBatching indicates MinReadySeconds strategy batch processing is active. + RolloutConditionMinReadyBatching RolloutConditionType = "MinReadyBatching" + // RolloutConditionMinReadyDegraded indicates MinReadySeconds strategy hit an explicit blocking error. + RolloutConditionMinReadyDegraded RolloutConditionType = "MinReadyDegraded" + // RolloutConditionMinReadyFinalized indicates MinReadySeconds strategy finalization has completed. + RolloutConditionMinReadyFinalized RolloutConditionType = "MinReadyFinalized" + // Finalise Reason // Finalise when the last batch is released and all pods will update to new version FinaliseReasonSuccess = "Success" diff --git a/config/crd/bases/rollouts.kruise.io_batchreleases.yaml b/config/crd/bases/rollouts.kruise.io_batchreleases.yaml index fef0369e..a826d67a 100644 --- a/config/crd/bases/rollouts.kruise.io_batchreleases.yaml +++ b/config/crd/bases/rollouts.kruise.io_batchreleases.yaml @@ -93,6 +93,14 @@ spec: - canaryReplicas type: object type: array + deploymentStrategy: + description: |- + DeploymentStrategy controls how native Deployment workloads are advanced. + Empty means Recreate for backward compatibility. + enum: + - Recreate + - MinReadySeconds + type: string enableExtraWorkloadForCanary: description: |- EnableExtraWorkloadForCanary indicates whether to create extra workload for canary @@ -377,6 +385,14 @@ spec: - canaryReplicas type: object type: array + deploymentStrategy: + description: |- + DeploymentStrategy controls how native Deployment workloads are advanced. + Empty means Recreate for backward compatibility. + enum: + - Recreate + - MinReadySeconds + type: string enableExtraWorkloadForCanary: description: |- EnableExtraWorkloadForCanary indicates whether to create extra workload for canary diff --git a/config/crd/bases/rollouts.kruise.io_rollouts.yaml b/config/crd/bases/rollouts.kruise.io_rollouts.yaml index d0dd7d03..b20097b7 100644 --- a/config/crd/bases/rollouts.kruise.io_rollouts.yaml +++ b/config/crd/bases/rollouts.kruise.io_rollouts.yaml @@ -104,6 +104,14 @@ spec: description: CanaryStrategy defines parameters for a Replica Based Canary properties: + deploymentStrategy: + description: |- + DeploymentStrategy controls how native Deployment workloads are advanced. + Empty means Recreate for backward compatibility. + enum: + - Recreate + - MinReadySeconds + type: string disableGenerateCanaryService: description: canary service will not be generated if DisableGenerateCanaryService is true @@ -1182,6 +1190,14 @@ spec: description: CanaryStrategy defines parameters for a Replica Based Canary properties: + deploymentStrategy: + description: |- + DeploymentStrategy controls how native Deployment workloads are advanced. + Empty means Recreate for backward compatibility. + enum: + - Recreate + - MinReadySeconds + type: string disableGenerateCanaryService: description: canary service will not be generated if DisableGenerateCanaryService is true diff --git a/docs/getting_started/deployment_min_ready_quickstart.md b/docs/getting_started/deployment_min_ready_quickstart.md new file mode 100644 index 00000000..4ec4360c --- /dev/null +++ b/docs/getting_started/deployment_min_ready_quickstart.md @@ -0,0 +1,156 @@ +# Deployment MinReadySeconds Quickstart + +This guide shows how to enable the native Deployment MinReadySeconds rollout strategy in Kruise Rollouts. + +## What this strategy does + +MinReadySeconds keeps native Kubernetes `Deployment.spec.strategy.type` unchanged and relies on inflated rollout fields plus Kruise Rollouts orchestration to advance batches. It is intended for users who want controlled, batch-based rollout behavior without switching the workload to Recreate. + +The controller writes and later restores these original Deployment fields: + +- `spec.minReadySeconds` +- `spec.progressDeadlineSeconds` +- `spec.strategy.rollingUpdate.maxUnavailable` +- `spec.strategy.rollingUpdate.maxSurge` + +The feature gate is `MinReadySecondsStrategy` and it is disabled by default. + +## When to use + +Use this strategy when: + +- you want native Deployment semantics to stay in place +- you need batch-based rollout control +- you want the controller to restore the original Deployment fields automatically + +Do not use it when: + +- a PodDisruptionBudget covers the target workload +- you need a traffic-routing canary instead of a native Deployment rollout +- you cannot tolerate long Ready-but-not-Available periods during rollout + +## Before you start + +- Kubernetes cluster with Kruise Rollouts installed +- A `Deployment` managed by a `Rollout` +- `MinReadySecondsStrategy=true` enabled in the controller feature gate +- No PodDisruptionBudget covering the target Deployment namespace and selector + +If a matching PDB exists, initialization is rejected and the rollout enters `MinReadyDegraded`. + +## Minimal rollout example + +```yaml +apiVersion: rollouts.kruise.io/v1beta1 +kind: Rollout +metadata: + name: demo-rollout +spec: + strategy: + canary: + deploymentStrategy: MinReadySeconds + steps: + - replicas: 20% + - replicas: 50% + - replicas: 100% +``` + +The associated Deployment should keep a normal RollingUpdate strategy. Kruise Rollouts will inflate the live fields during rollout and restore the original values on finalize. + +Example workload: + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: demo-deploy +spec: + replicas: 3 + selector: + matchLabels: + app: demo + template: + metadata: + labels: + app: demo + spec: + containers: + - name: app + image: nginx:1.27 +``` + +## Enable the feature gate + +Set the controller feature gate to: + +```bash +MinReadySecondsStrategy=true +``` + +Without this gate, the controller rejects MinReadySeconds rollouts and records a warning event. + +## Five minute smoke test + +1. Apply the Rollout and Deployment. +2. Enable `MinReadySecondsStrategy=true` on the controller. +3. Update the Deployment image. +4. Watch the Rollout status and the Deployment annotations. +5. Confirm the rollout eventually reaches `MinReadyFinalized`. + +## Verify the rollout + +After the rollout starts: + +```bash +kubectl get rollout demo-rollout +kubectl get deploy demo-deploy -o yaml +kubectl describe rollout demo-rollout +``` + +Expected behavior: + +- the Deployment gets annotated with the original rollout fields +- `minReadySeconds` is inflated to the MaxReadySeconds value +- `progressDeadlineSeconds` is inflated to the MaxProgressSeconds value +- `maxUnavailable` is driven batch-by-batch +- `maxSurge` is kept at `0` + +## What to look for + +- `MinReadyInitialized` means the Deployment was initialized successfully +- `MinReadyBatching` means batches are progressing +- `MinReadyFinalized` means the original Deployment fields were restored +- `MinReadyDegraded` means the controller hit a blocking condition + +## Recreate comparison + +Compared with the old Recreate-style behavior: + +- MinReadySeconds does not change `Deployment.spec.strategy.type` +- original rollout fields are stored and restored explicitly +- batch progression is driven by readiness on inflated fields +- Recreate-style mutation is skipped when the feature is enabled + +## FAQ + +### How do I enable the feature gate? + +Set `MinReadySecondsStrategy=true` on the controller. + +### Why does the rollout fail immediately? + +The most common reasons are a disabled feature gate or a matching PDB. + +### Can I use this with a Service Mesh? + +Yes, but only if the mesh does not rely on mutating the Deployment strategy type or blocking readiness in a way that conflicts with the inflated rollout fields. + +### Why does Available stay false for a long time? + +That is expected. The strategy intentionally inflates `minReadySeconds` so the controller can control rollout progression by batch. + +## Notes + +- The strategy does not modify `Deployment.spec.strategy.type`. +- PDB-covered workloads are blocked in alpha. +- Existing annotations are treated as live state. Missing or partial original annotations are not considered a success path. diff --git a/docs/migration/recreate_to_minready.md b/docs/migration/recreate_to_minready.md new file mode 100644 index 00000000..968ac4b0 --- /dev/null +++ b/docs/migration/recreate_to_minready.md @@ -0,0 +1,83 @@ +# Migration from Recreate to MinReadySeconds + +This guide explains how to move an existing rollout from the legacy Recreate-based flow to the MinReadySeconds strategy. + +## What changes + +With MinReadySeconds: + +- the Deployment stays on native RollingUpdate +- the controller uses inflated rollout fields instead of switching `strategy.type` +- original rollout fields are stored in annotations and restored later + +This is different from the older Recreate behavior. + +## Behavior comparison + +| Topic | Recreate | MinReadySeconds | +|-------|----------|-----------------| +| `Deployment.spec.strategy.type` | Switched to Recreate during rollout | Left unchanged | +| Rollout control | Full stop-and-replace behavior | Batch-based progression | +| Original fields | Not restored from annotations | Stored and restored | +| PDB compatibility | Depends on workload flow | Blocked in alpha | +| Operational risk | Simpler but more invasive | Less invasive but stricter on readiness | + +## Compatibility checklist + +Before migrating, confirm: + +- the controller feature gate `MinReadySecondsStrategy` is enabled +- the target workload is not covered by a matching PDB +- the rollout spec uses `deploymentStrategy: MinReadySeconds` +- the workload can tolerate long Ready-but-not-Available periods during rollout +- your GitOps tool will not continuously fight the inflated rollout fields +- HPA is either disabled for the rollout or understood well enough to accept batch recalculation + +## Migration steps + +1. Pick a single namespace for the first trial. +2. Update the Rollout spec to use `deploymentStrategy: MinReadySeconds`. +3. Enable the `MinReadySecondsStrategy` feature gate on the controller. +4. Reconcile the rollout once so the controller writes the original annotations. +5. Verify that the live Deployment fields are inflated. +6. Watch the rollout status until `MinReadyFinalized`. +7. Roll the change out to other namespaces only after the first one is stable. + +## Expected controller behavior + +The executor routes MinReadySeconds rollouts to the MinReady controller. +The webhook keeps the Deployment strategy type unchanged for this path. +The controller updates batch state by patching `maxUnavailable` only. + +## Rollout plan for an existing service + +If you are moving a production service: + +- start with one non-critical namespace +- watch events and status conditions during the first rollout +- confirm the Deployment annotations are written and later removed +- verify that your GitOps reconciler is not reverting `maxUnavailable` +- verify that HPA is not introducing surprise replica swings during the test rollout + +## Rollback + +To roll back, switch the Rollout spec back to the Recreate strategy and let the controller reconcile the workload back to its original fields. + +If the rollout is already degraded, resolve the blocking cause first: + +- enable the feature gate if it was disabled +- remove the overlapping PDB +- repair missing or malformed original annotations +- restore live Deployment fields that drifted out of the inflated state + +## Known limitations + +- PDB-covered workloads are blocked in alpha. +- Any direct manual edit of the inflated Deployment fields can move the rollout into `MinReadyDegraded`. +- This migration is only appropriate when you want the controller to preserve the native Deployment strategy type. + +## Notes + +- Do not migrate workloads with a covering PDB unless the strategy is redesigned for that topology. +- Do not change `Deployment.spec.strategy.type` manually during the migration. +- Keep the original annotations intact until finalize completes. diff --git a/docs/operator/deployment_min_ready_runbook.md b/docs/operator/deployment_min_ready_runbook.md new file mode 100644 index 00000000..2c23731d --- /dev/null +++ b/docs/operator/deployment_min_ready_runbook.md @@ -0,0 +1,134 @@ +# Deployment MinReadySeconds Runbook + +This page is for operators who need to inspect, support, or recover a MinReadySeconds rollout. + +## Quick status map + +| Condition | Meaning | Operator action | +|-----------|---------|-----------------| +| `MinReadyInitialized` | Original values are stored and live fields are inflated | Start watching batches | +| `MinReadyBatching` | A batch is in progress or waiting on readiness | Inspect pods and rollout status | +| `MinReadyDegraded` | The controller stopped on an explicit blocking issue | Follow the relevant recovery path | +| `MinReadyFinalized` | The original Deployment fields were restored | No action needed | + +## Normal lifecycle + +1. `Initialize` stores the original Deployment fields in annotations. +2. The controller inflates rollout fields and advances each batch. +3. `Finalize` restores the original fields and removes the annotations. + +The rollout status uses these conditions: + +- `MinReadyInitialized` +- `MinReadyBatching` +- `MinReadyDegraded` +- `MinReadyFinalized` + +## Common events + +- `MinReadyInitialized` +- `MinReadyBatchUpgraded` +- `MinReadyFinalized` +- `MinReadyDegradedMissingAnnotations` +- `MinReadyDegradedDriftDetected` +- `MinReadyDegradedPDBIncompatible` +- `MinReadyFeatureGateDisabled` + +The current implementation does not emit a dedicated `MinReadyBatchStuck` event. Use the `MinReadyBatching` condition together with `rollout_minready_stuck_seconds` to detect long waits. + +## Degraded states + +`MinReadyDegraded` means the controller stopped because the rollout cannot safely continue. + +Typical causes: + +- feature gate disabled +- original annotations missing or partial +- live Deployment fields no longer match the inflated MinReadySeconds state +- PDB selector matches the Deployment pods + +## Troubleshooting matrix + +| Reason | Diagnostic command | What to look for | +|--------|--------------------|------------------| +| Feature gate disabled | `kubectl describe rollout ` | Warning event with `MinReadyFeatureGateDisabled` | +| Missing annotations | `kubectl get deploy -o yaml` | One or more original annotations absent | +| Drift detected | `kubectl get deploy -o yaml` | `minReadySeconds`, `progressDeadlineSeconds`, or `maxSurge` no longer match inflated values | +| PDB conflict | `kubectl get pdb -n -o yaml` | Selector matches the workload labels | +| Batch waiting too long | `kubectl get rollout -o yaml` and metrics | `MinReadyBatching` stays true and `rollout_minready_stuck_seconds` remains above zero | + +## PDB incompatibility + +If a matching PodDisruptionBudget exists for the target Deployment, initialization is rejected. + +This is intentional in alpha. Do not try to work around it by manually forcing batch progression. + +## Break-glass flow + +Use this when the rollout is degraded and you need to recover quickly: + +1. Identify the blocking reason from events and conditions. +2. Fix the root cause instead of patching around it. +3. Reconcile the Rollout again. +4. Confirm the rollout returns to `MinReadyInitialized`, `MinReadyBatching`, or `MinReadyFinalized`. + +If the original annotations were removed accidentally, restore them before the next reconcile. Do not write default values by hand unless the original values were truly default. + +## Drift detection + +The controller treats these as drift: + +- `spec.minReadySeconds` no longer equals the inflated value +- `spec.progressDeadlineSeconds` no longer equals the inflated value +- `spec.strategy.rollingUpdate.maxSurge` is no longer `0` +- original annotations are partially missing + +When drift is detected, the rollout enters `MinReadyDegraded` and emits `MinReadyDegradedDriftDetected`. + +## Inspecting a live rollout + +```bash +kubectl get rollout -o yaml +kubectl describe rollout +kubectl get deploy -o yaml +kubectl get pdb -n +``` + +Check these fields: + +- annotations under `metadata.annotations` +- `status.conditions` +- the current batch and replica counts + +## Recovery + +Recovery depends on the cause. + +- If the feature gate was disabled, enable `MinReadySecondsStrategy` and retry the rollout. +- If a PDB matches, remove the PDB or move the workload to a non-overlapping selector. +- If annotations are missing or the live fields drifted, treat the Deployment as damaged and re-create the rollout state from the desired spec. +- If the rollout is waiting on batch readiness, inspect `status.conditions`, the Deployment replica counts, and the current batch in the Rollout status before taking action. + +## Finalization + +Finalize restores: + +- `minReadySeconds` +- `progressDeadlineSeconds` +- `maxUnavailable` +- `maxSurge` + +If finalize fails, the controller reports `MinReadyDegraded` and keeps the annotations until the blocking issue is resolved. + +## Common log patterns + +- `MinReadyControl.Initialize` failures usually point to feature gate, PDB, or annotation problems. +- `MinReadyControl.UpgradeBatch[...]` failures usually point to drift or stale workload state. +- `MinReadyControl.Finalize` failures usually point to missing annotations or malformed annotation values. + +## Monitoring suggestions + +- Alert when `MinReadyDegraded` stays true for more than one reconcile window. +- Alert when `rollout_minready_degraded_total` increases for the same rollout. +- Track `rollout_minready_batch_duration_seconds` for batch completion latency. +- Track `rollout_minready_stuck_seconds` to spot batches that are still waiting on readiness. diff --git a/pkg/controller/batchrelease/batchrelease_controller_test.go b/pkg/controller/batchrelease/batchrelease_controller_test.go index a29428a9..e2016793 100644 --- a/pkg/controller/batchrelease/batchrelease_controller_test.go +++ b/pkg/controller/batchrelease/batchrelease_controller_test.go @@ -21,6 +21,7 @@ import ( "encoding/json" "fmt" "strconv" + "strings" "testing" "time" @@ -29,6 +30,7 @@ import ( kruiseappsv1alpha1 "github.com/openkruise/kruise-api/apps/v1alpha1" apps "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -44,7 +46,9 @@ import ( rolloutapi "github.com/openkruise/rollouts/api" "github.com/openkruise/rollouts/api/v1beta1" + "github.com/openkruise/rollouts/pkg/feature" "github.com/openkruise/rollouts/pkg/util" + utilfeature "github.com/openkruise/rollouts/pkg/util/feature" ) const TIME_LAYOUT = "2006-01-02 15:04:05" @@ -209,6 +213,7 @@ var ( func init() { scheme = runtime.NewScheme() apimachineryruntime.Must(apps.AddToScheme(scheme)) + apimachineryruntime.Must(policyv1.AddToScheme(scheme)) apimachineryruntime.Must(rolloutapi.AddToScheme(scheme)) apimachineryruntime.Must(kruiseappsv1alpha1.AddToScheme(scheme)) @@ -824,6 +829,166 @@ func TestReconcile_Deployment(t *testing.T) { } } +func TestExecutorRoutesMinReadyDeploymentStrategy(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=false") + release := releaseDeploy.DeepCopy() + release.Spec.ReleasePlan.RollingStyle = v1beta1.PartitionRollingStyle + release.Spec.ReleasePlan.DeploymentStrategy = v1beta1.DeploymentStrategyMinReadySeconds + release.Status.Phase = v1beta1.RolloutPhasePreparing + deployment := stableDeploy.DeepCopy() + rec := record.NewFakeRecorder(100) + cli := fake.NewClientBuilder().WithScheme(scheme). + WithObjects(release, deployment). + WithStatusSubresource(&v1beta1.BatchRelease{}). + Build() + + controller, err := NewReleasePlanExecutor(cli, rec).getReleaseController(release, release.Status.DeepCopy()) + if err != nil { + t.Fatalf("getReleaseController failed: %v", err) + } + err = controller.Initialize() + if err == nil || !strings.Contains(err.Error(), "feature gate is disabled") { + t.Fatalf("Initialize error = %v, want MinReady feature gate disabled", err) + } +} + +func TestMinReadyControlPlaneRecordsInitializedConditionAndEvent(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := minReadyRelease() + deployment := stableDeploy.DeepCopy() + deployment.ResourceVersion = "1" + rec := record.NewFakeRecorder(100) + cli := fake.NewClientBuilder().WithScheme(scheme). + WithObjects(release, deployment). + WithStatusSubresource(&v1beta1.BatchRelease{}). + Build() + status := release.Status.DeepCopy() + controller, err := NewReleasePlanExecutor(cli, rec).getReleaseController(release, status) + if err != nil { + t.Fatalf("getReleaseController failed: %v", err) + } + + if err := controller.Initialize(); err != nil { + t.Fatalf("Initialize failed: %v", err) + } + + assertCondition(t, status, v1beta1.RolloutConditionMinReadyInitialized, corev1.ConditionTrue, "MinReadyInitialized") + assertCondition(t, status, v1beta1.RolloutConditionMinReadyDegraded, corev1.ConditionFalse, "MinReadyHealthy") + assertRecordedEvent(t, rec, "MinReadyInitialized") +} + +func TestMinReadyControlPlaneRecordsDegradedForPDB(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := minReadyRelease() + deployment := stableDeploy.DeepCopy() + deployment.ResourceVersion = "1" + deployment.Spec.Template.Labels = map[string]string{"app": "busybox"} + pdb := &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{Name: "sample-pdb", Namespace: deployment.Namespace}, + Spec: policyv1.PodDisruptionBudgetSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "busybox"}}, + }, + } + rec := record.NewFakeRecorder(100) + cli := fake.NewClientBuilder().WithScheme(scheme). + WithObjects(release, deployment, pdb). + WithStatusSubresource(&v1beta1.BatchRelease{}). + Build() + status := release.Status.DeepCopy() + controller, err := NewReleasePlanExecutor(cli, rec).getReleaseController(release, status) + if err != nil { + t.Fatalf("getReleaseController failed: %v", err) + } + + err = controller.Initialize() + if err == nil || !strings.Contains(err.Error(), "MinReadyDegradedPDBIncompatible") { + t.Fatalf("Initialize error = %v, want PDB degraded", err) + } + + assertCondition(t, status, v1beta1.RolloutConditionMinReadyDegraded, corev1.ConditionTrue, "MinReadyDegradedPDBIncompatible") + assertRecordedEvent(t, rec, "MinReadyDegradedPDBIncompatible") +} + +func BenchmarkRecreateReconcile(b *testing.B) { + release := releaseDeploy.DeepCopy() + deployment := stableDeploy.DeepCopy() + rec := record.NewFakeRecorder(100) + cli := fake.NewClientBuilder().WithScheme(scheme). + WithObjects(release, deployment). + WithStatusSubresource(&v1beta1.BatchRelease{}). + Build() + reconciler := &BatchReleaseReconciler{ + Client: cli, + recorder: rec, + Scheme: scheme, + executor: NewReleasePlanExecutor(cli, rec), + } + req := reconcile.Request{NamespacedName: client.ObjectKeyFromObject(release)} + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _ = reconciler.Reconcile(context.TODO(), req) + } +} + +func BenchmarkMinReadyReconcile(b *testing.B) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := minReadyRelease() + deployment := stableDeploy.DeepCopy() + deployment.ResourceVersion = "1" + rec := record.NewFakeRecorder(100) + cli := fake.NewClientBuilder().WithScheme(scheme). + WithObjects(release, deployment). + WithStatusSubresource(&v1beta1.BatchRelease{}). + Build() + reconciler := &BatchReleaseReconciler{ + Client: cli, + recorder: rec, + Scheme: scheme, + executor: NewReleasePlanExecutor(cli, rec), + } + req := reconcile.Request{NamespacedName: client.ObjectKeyFromObject(release)} + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _ = reconciler.Reconcile(context.TODO(), req) + } +} + +func minReadyRelease() *v1beta1.BatchRelease { + release := releaseDeploy.DeepCopy() + release.Spec.ReleasePlan.RollingStyle = v1beta1.PartitionRollingStyle + release.Spec.ReleasePlan.DeploymentStrategy = v1beta1.DeploymentStrategyMinReadySeconds + release.Status.Phase = v1beta1.RolloutPhasePreparing + return release +} + +func assertCondition(t *testing.T, status *v1beta1.BatchReleaseStatus, condType v1beta1.RolloutConditionType, condStatus corev1.ConditionStatus, reason string) { + t.Helper() + for _, condition := range status.Conditions { + if condition.Type != condType { + continue + } + if condition.Status != condStatus || condition.Reason != reason { + t.Fatalf("condition %s = %s/%s, want %s/%s", condType, condition.Status, condition.Reason, condStatus, reason) + } + return + } + t.Fatalf("condition %s not found in %#v", condType, status.Conditions) +} + +func assertRecordedEvent(t *testing.T, rec *record.FakeRecorder, want string) { + t.Helper() + select { + case event := <-rec.Events: + if !strings.Contains(event, want) { + t.Fatalf("event = %q, want containing %q", event, want) + } + case <-time.After(time.Second): + t.Fatalf("event containing %q not recorded", want) + } +} + func containers(version string) []corev1.Container { return []corev1.Container{ { diff --git a/pkg/controller/batchrelease/batchrelease_executor.go b/pkg/controller/batchrelease/batchrelease_executor.go index ec74e27b..aa03d46b 100644 --- a/pkg/controller/batchrelease/batchrelease_executor.go +++ b/pkg/controller/batchrelease/batchrelease_executor.go @@ -244,6 +244,10 @@ func (r *Executor) getReleaseController(release *v1beta1.BatchRelease, newStatus return partitionstyle.NewControlPlane(cloneset.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil } if targetRef.APIVersion == apps.SchemeGroupVersion.String() && targetRef.Kind == reflect.TypeOf(apps.Deployment{}).Name() { + if release.Spec.ReleasePlan.DeploymentStrategy == v1beta1.DeploymentStrategyMinReadySeconds { + klog.InfoS("Using Deployment MinReadySeconds partition-style release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) + return partitionstyle.NewControlPlane(partitiondeployment.NewMinReadyController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil + } klog.InfoS("Using Deployment partition-style release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) return partitionstyle.NewControlPlane(partitiondeployment.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil } diff --git a/pkg/controller/batchrelease/control/partitionstyle/control_plane.go b/pkg/controller/batchrelease/control/partitionstyle/control_plane.go index ff82e2f4..d921cb22 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/control_plane.go +++ b/pkg/controller/batchrelease/control/partitionstyle/control_plane.go @@ -62,14 +62,17 @@ func NewControlPlane(f NewInterfaceFunc, cli client.Client, recorder record.Even func (rc *realBatchControlPlane) Initialize() error { controller, err := rc.BuildController() if err != nil { + rc.recordMinReadyDegraded("MinReadyInitializeFailed", err) return err } // claim workload under our control err = controller.Initialize(rc.release) if err != nil { + rc.recordMinReadyDegraded("MinReadyInitializeFailed", err) return err } + rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyInitialized, "MinReadyInitialized", "MinReadySeconds strategy initialized") // record revision and replicas workloadInfo := controller.GetWorkloadInfo() @@ -88,20 +91,24 @@ func (rc *realBatchControlPlane) Initialize() error { func (rc *realBatchControlPlane) UpgradeBatch() error { controller, err := rc.BuildController() if err != nil { + rc.recordMinReadyDegraded("MinReadyBatchingFailed", err) return err } if controller.GetWorkloadInfo().Replicas == 0 { + rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatching", "MinReadySeconds strategy has no replicas to upgrade") return nil } err = rc.countAndUpdateNoNeedUpdateReplicas() if err != nil { + rc.recordMinReadyDegraded("MinReadyBatchingFailed", err) return err } batchContext, err := controller.CalculateBatchContext(rc.release) if err != nil { + rc.recordMinReadyDegraded("MinReadyBatchingFailed", err) return err } klog.Infof("BatchRelease %v calculated context when upgrade batch: %s", @@ -109,19 +116,27 @@ func (rc *realBatchControlPlane) UpgradeBatch() error { err = controller.UpgradeBatch(batchContext) if err != nil { + rc.recordMinReadyDegraded("MinReadyBatchingFailed", err) return err } - return rc.patcher.PatchPodBatchLabel(batchContext) + if err := rc.patcher.PatchPodBatchLabel(batchContext); err != nil { + rc.recordMinReadyDegraded("MinReadyBatchingFailed", err) + return err + } + rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatching", "MinReadySeconds strategy advanced the current batch") + return nil } func (rc *realBatchControlPlane) EnsureBatchPodsReadyAndLabeled() error { controller, err := rc.BuildController() if err != nil { + rc.recordMinReadyDegraded("MinReadyBatchingFailed", err) return err } if controller.GetWorkloadInfo().Replicas == 0 { + rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatchReady", "MinReadySeconds strategy batch is ready") return nil } @@ -129,23 +144,38 @@ func (rc *realBatchControlPlane) EnsureBatchPodsReadyAndLabeled() error { // the target calculated should be consistent with UpgradeBatch. batchContext, err := controller.CalculateBatchContext(rc.release) if err != nil { + rc.recordMinReadyDegraded("MinReadyBatchingFailed", err) return err } klog.Infof("BatchRelease %v calculated context when check batch ready: %s", klog.KObj(rc.release), batchContext.Log()) - return batchContext.IsBatchReady() + if err := batchContext.IsBatchReady(); err != nil { + observeMinReadyBatchWait(rc.release, util.GetBatchReleaseCondition(*rc.newStatus, v1beta1.RolloutConditionMinReadyBatching)) + return err + } + rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatchReady", "MinReadySeconds strategy batch is ready") + return nil } func (rc *realBatchControlPlane) Finalize() error { controller, err := rc.BuildController() if err != nil { - return client.IgnoreNotFound(err) + if err := client.IgnoreNotFound(err); err != nil { + rc.recordMinReadyDegraded("MinReadyFinalizeFailed", err) + return err + } + return nil } // release workload control info and clean up resources if it needs - return controller.Finalize(rc.release) + if err := controller.Finalize(rc.release); err != nil { + rc.recordMinReadyDegraded("MinReadyFinalizeFailed", err) + return err + } + rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyFinalized, "MinReadyFinalized", "MinReadySeconds strategy finalized") + return nil } func (rc *realBatchControlPlane) SyncWorkloadInformation() (control.WorkloadEventType, *util.WorkloadInfo, error) { diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/control.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/control.go index 9f3ba508..d34a1d51 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/control.go @@ -55,6 +55,10 @@ func NewController(cli client.Client, key types.NamespacedName, _ schema.GroupVe } } +func NewMinReadyController(cli client.Client, key types.NamespacedName, gvk schema.GroupVersionKind) partitionstyle.Interface { + return &MinReadyControl{realController: NewController(cli, key, gvk).(*realController)} +} + func (rc *realController) GetWorkloadInfo() *util.WorkloadInfo { return rc.WorkloadInfo } diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_constants.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_constants.go new file mode 100644 index 00000000..41783338 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_constants.go @@ -0,0 +1,117 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "fmt" + "strconv" + "strings" + + "k8s.io/apimachinery/pkg/util/intstr" + + "github.com/openkruise/rollouts/api/v1beta1" +) + +const ( + AnnotationOriginalMinReadySeconds = "rollouts.kruise.io/original-min-ready-seconds" + AnnotationOriginalProgressDeadlineSeconds = "rollouts.kruise.io/original-progress-deadline-seconds" + AnnotationOriginalMaxUnavailable = "rollouts.kruise.io/original-max-unavailable" + AnnotationOriginalMaxSurge = "rollouts.kruise.io/original-max-surge" + + AnnotationValueKubernetesDefault = "__k8s_default__" + + InflatedMinReadySeconds int32 = v1beta1.MaxReadySeconds + InflatedProgressDeadlineSeconds int32 = v1beta1.MaxProgressSeconds + InflatedMaxSurgeInt int32 = 1 +) + +var AllOriginalAnnotations = []string{ + AnnotationOriginalMinReadySeconds, + AnnotationOriginalProgressDeadlineSeconds, + AnnotationOriginalMaxUnavailable, + AnnotationOriginalMaxSurge, +} + +func serializeOriginalInt32(value *int32) string { + if value == nil { + return AnnotationValueKubernetesDefault + } + return strconv.FormatInt(int64(*value), 10) +} + +func serializeOriginalIntOrString(value *intstr.IntOrString) string { + if value == nil { + return AnnotationValueKubernetesDefault + } + if value.Type == intstr.String { + return value.StrVal + } + return strconv.FormatInt(int64(value.IntVal), 10) +} + +func parseOriginalInt32(annotations map[string]string, key string) (*int32, error) { + raw, err := readOriginalAnnotation(annotations, key) + if err != nil || raw == AnnotationValueKubernetesDefault { + return nil, err + } + n, err := strconv.ParseInt(raw, 10, 32) + if err != nil { + return nil, fmt.Errorf("annotation %s malformed int32: %w", key, err) + } + v := int32(n) + return &v, nil +} + +func parseOriginalIntOrString(annotations map[string]string, key string) (*intstr.IntOrString, error) { + raw, err := readOriginalAnnotation(annotations, key) + if err != nil || raw == AnnotationValueKubernetesDefault { + return nil, err + } + if strings.HasSuffix(raw, "%") { + if _, err := strconv.Atoi(strings.TrimSuffix(raw, "%")); err != nil { + return nil, fmt.Errorf("annotation %s malformed percent: %w", key, err) + } + v := intstr.FromString(raw) + return &v, nil + } + n, err := strconv.Atoi(raw) + if err != nil { + return nil, fmt.Errorf("annotation %s malformed int: %w", key, err) + } + v := intstr.FromInt(n) + return &v, nil +} + +func readOriginalAnnotation(annotations map[string]string, key string) (string, error) { + raw, ok := annotations[key] + if !ok { + return "", fmt.Errorf("annotation %s missing", key) + } + if raw == "" { + return "", fmt.Errorf("annotation %s present but empty", key) + } + return raw, nil +} + +func hasAnyOriginalAnnotation(annotations map[string]string) bool { + for _, key := range AllOriginalAnnotations { + if _, ok := annotations[key]; ok { + return true + } + } + return false +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go new file mode 100644 index 00000000..c4a879e7 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go @@ -0,0 +1,297 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "context" + "fmt" + + apps "k8s.io/api/apps/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/openkruise/rollouts/api/v1beta1" + batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle" + "github.com/openkruise/rollouts/pkg/feature" + utilfeature "github.com/openkruise/rollouts/pkg/util/feature" +) + +type MinReadyControl struct { + *realController +} + +func (mc *MinReadyControl) BuildController() (partitionstyle.Interface, error) { + if mc.realController == nil { + return nil, fmt.Errorf("MinReadyControl.BuildController: realController is nil") + } + built, err := mc.realController.BuildController() + if err != nil { + return nil, err + } + return &MinReadyControl{realController: built.(*realController)}, nil +} + +func (mc *MinReadyControl) Initialize(_ *v1beta1.BatchRelease) error { + if err := mc.ensureInitializeAllowed(); err != nil { + return fmt.Errorf("MinReadyControl.Initialize: %w", err) + } + original := mc.object.DeepCopy() + modified := original.DeepCopy() + if err := writeOriginalAnnotations(original, modified); err != nil { + return fmt.Errorf("MinReadyControl.Initialize: %w", err) + } + if hasAnyOriginalAnnotation(original.Annotations) { + if err := ensureInflatedDeploymentStrategy(original); err != nil { + return fmt.Errorf("MinReadyControl.Initialize: %w", err) + } + } + inflateDeploymentStrategy(modified) + patch := client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{}) + return mc.client.Patch(context.TODO(), modified, patch) +} + +func (mc *MinReadyControl) UpgradeBatch(ctx *batchcontext.BatchContext) error { + if mc.object.Spec.Strategy.RollingUpdate == nil { + return fmt.Errorf("MinReadyControl.UpgradeBatch[%d]: rollingUpdate is nil", ctx.CurrentBatch) + } + if err := ensureInflatedDeploymentStrategy(mc.object); err != nil { + return fmt.Errorf("MinReadyControl.UpgradeBatch[%d]: %w", ctx.CurrentBatch, err) + } + current, err := intstr.GetScaledValueFromIntOrPercent( + mc.object.Spec.Strategy.RollingUpdate.MaxUnavailable, int(ctx.Replicas), true) + if err != nil { + return fmt.Errorf("MinReadyControl.UpgradeBatch[%d]: %w", ctx.CurrentBatch, err) + } + target := ctx.DesiredUpdatedReplicas + if int32(current) > target { + return fmt.Errorf("MinReadyControl.UpgradeBatch[%d]: %s: maxUnavailable=%d exceeds target=%d", + ctx.CurrentBatch, EventDegradedDriftDetected, current, target) + } + if int32(current) >= target { + return nil + } + original := mc.object.DeepCopy() + modified := original.DeepCopy() + maxUnavailable := intstr.FromInt(int(target)) + modified.Spec.Strategy.RollingUpdate.MaxUnavailable = &maxUnavailable + patch := client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{}) + return mc.client.Patch(context.TODO(), modified, patch) +} + +func (mc *MinReadyControl) Finalize(_ *v1beta1.BatchRelease) error { + if mc.object == nil { + return nil + } + if !hasAnyOriginalAnnotation(mc.object.Annotations) { + return nil + } + original := mc.object.DeepCopy() + restored, err := parseOriginalDeploymentStrategy(original.Annotations) + if err != nil { + return fmt.Errorf("MinReadyControl.Finalize: %w", err) + } + modified := original.DeepCopy() + applyOriginalDeploymentStrategy(modified, restored) + for _, key := range AllOriginalAnnotations { + delete(modified.Annotations, key) + } + patch := client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{}) + return mc.client.Patch(context.TODO(), modified, patch) +} + +func (mc *MinReadyControl) CalculateBatchContext(release *v1beta1.BatchRelease) (*batchcontext.BatchContext, error) { + currentBatch := release.Status.CanaryStatus.CurrentBatch + desiredPartition := release.Spec.ReleasePlan.Batches[currentBatch].CanaryReplicas + desiredUpdatedReplicas, err := minReadyDesiredUpdatedReplicas(desiredPartition, mc.object) + if err != nil { + return nil, fmt.Errorf("MinReadyControl.CalculateBatchContext: %w", err) + } + return &batchcontext.BatchContext{ + RolloutID: release.Spec.ReleasePlan.RolloutID, + CurrentBatch: currentBatch, + UpdateRevision: release.Status.UpdateRevision, + Replicas: mc.Replicas, + UpdatedReplicas: mc.object.Status.UpdatedReplicas, + UpdatedReadyReplicas: mc.object.Status.ReadyReplicas, + PlannedUpdatedReplicas: desiredUpdatedReplicas, + DesiredUpdatedReplicas: desiredUpdatedReplicas, + DesiredPartition: desiredPartition, + FailureThreshold: release.Spec.ReleasePlan.FailureThreshold, + Pods: mc.pods, + }, nil +} + +func (mc *MinReadyControl) ensureInitializeAllowed() error { + if mc.realController == nil || mc.object == nil { + return fmt.Errorf("deployment is not loaded") + } + if !utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) { + return fmt.Errorf("%s feature gate is disabled", feature.MinReadySecondsStrategy) + } + covered, err := mc.hasPDBCoveringDeployment() + if err != nil { + return err + } + if covered { + return fmt.Errorf("%s: PDB detected", EventDegradedPDBIncompatible) + } + return nil +} + +func writeOriginalAnnotations(original, modified *apps.Deployment) error { + if modified.Annotations == nil { + modified.Annotations = map[string]string{} + } + if hasAnyOriginalAnnotation(original.Annotations) { + return ensureAllOriginalAnnotations(original.Annotations) + } + modified.Annotations[AnnotationOriginalMinReadySeconds] = serializeOriginalInt32(&original.Spec.MinReadySeconds) + modified.Annotations[AnnotationOriginalProgressDeadlineSeconds] = serializeOriginalInt32(original.Spec.ProgressDeadlineSeconds) + modified.Annotations[AnnotationOriginalMaxUnavailable] = serializeOriginalIntOrString(originalMaxUnavailable(original)) + modified.Annotations[AnnotationOriginalMaxSurge] = serializeOriginalIntOrString(originalMaxSurge(original)) + return nil +} + +func ensureAllOriginalAnnotations(annotations map[string]string) error { + for _, key := range AllOriginalAnnotations { + if _, ok := annotations[key]; !ok { + return fmt.Errorf("annotation %s missing", key) + } + } + return nil +} + +func originalMaxUnavailable(deployment *apps.Deployment) *intstr.IntOrString { + if deployment.Spec.Strategy.RollingUpdate == nil { + return nil + } + return deployment.Spec.Strategy.RollingUpdate.MaxUnavailable +} + +func originalMaxSurge(deployment *apps.Deployment) *intstr.IntOrString { + if deployment.Spec.Strategy.RollingUpdate == nil { + return nil + } + return deployment.Spec.Strategy.RollingUpdate.MaxSurge +} + +func inflateDeploymentStrategy(deployment *apps.Deployment) { + progressDeadlineSeconds := InflatedProgressDeadlineSeconds + maxUnavailable := intstr.FromInt(0) + maxSurge := intstr.FromInt(int(InflatedMaxSurgeInt)) + deployment.Spec.MinReadySeconds = InflatedMinReadySeconds + deployment.Spec.ProgressDeadlineSeconds = &progressDeadlineSeconds + if deployment.Spec.Strategy.RollingUpdate == nil { + deployment.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{} + } + deployment.Spec.Strategy.RollingUpdate.MaxUnavailable = &maxUnavailable + deployment.Spec.Strategy.RollingUpdate.MaxSurge = &maxSurge +} + +func ensureInflatedDeploymentStrategy(deployment *apps.Deployment) error { + if deployment.Spec.MinReadySeconds != InflatedMinReadySeconds { + return fmt.Errorf("%s: minReadySeconds=%d want %d", + EventDegradedDriftDetected, deployment.Spec.MinReadySeconds, InflatedMinReadySeconds) + } + if deployment.Spec.ProgressDeadlineSeconds == nil || *deployment.Spec.ProgressDeadlineSeconds != InflatedProgressDeadlineSeconds { + return fmt.Errorf("%s: progressDeadlineSeconds=%v want %d", + EventDegradedDriftDetected, deployment.Spec.ProgressDeadlineSeconds, InflatedProgressDeadlineSeconds) + } + if deployment.Spec.Strategy.RollingUpdate == nil { + return fmt.Errorf("%s: rollingUpdate is nil", EventDegradedDriftDetected) + } + if maxSurge := deployment.Spec.Strategy.RollingUpdate.MaxSurge; maxSurge == nil || maxSurge.Type != intstr.Int || maxSurge.IntVal != InflatedMaxSurgeInt { + return fmt.Errorf("%s: maxSurge=%v want %d", EventDegradedDriftDetected, maxSurge, InflatedMaxSurgeInt) + } + return nil +} + +func minReadyDesiredUpdatedReplicas(desired intstr.IntOrString, deployment *apps.Deployment) (int32, error) { + if deployment.Spec.Replicas == nil { + return 0, fmt.Errorf("deployment replicas is nil") + } + replicas := int(*deployment.Spec.Replicas) + target, err := intstr.GetScaledValueFromIntOrPercent(&desired, replicas, true) + if err != nil { + return 0, err + } + if target < 0 { + return 0, nil + } + if target > replicas { + return int32(replicas), nil + } + return int32(target), nil +} + +type originalDeploymentStrategy struct { + minReadySeconds *int32 + progressDeadlineSeconds *int32 + maxUnavailable *intstr.IntOrString + maxSurge *intstr.IntOrString +} + +func parseOriginalDeploymentStrategy(annotations map[string]string) (*originalDeploymentStrategy, error) { + if err := ensureAllOriginalAnnotations(annotations); err != nil { + return nil, err + } + minReadySeconds, err := parseOriginalInt32(annotations, AnnotationOriginalMinReadySeconds) + if err != nil { + return nil, err + } + progressDeadlineSeconds, err := parseOriginalInt32(annotations, AnnotationOriginalProgressDeadlineSeconds) + if err != nil { + return nil, err + } + maxUnavailable, err := parseOriginalIntOrString(annotations, AnnotationOriginalMaxUnavailable) + if err != nil { + return nil, err + } + maxSurge, err := parseOriginalIntOrString(annotations, AnnotationOriginalMaxSurge) + if err != nil { + return nil, err + } + return &originalDeploymentStrategy{ + minReadySeconds: minReadySeconds, + progressDeadlineSeconds: progressDeadlineSeconds, + maxUnavailable: maxUnavailable, + maxSurge: maxSurge, + }, nil +} + +func applyOriginalDeploymentStrategy(deployment *apps.Deployment, original *originalDeploymentStrategy) { + deployment.Spec.MinReadySeconds = 0 + if original.minReadySeconds != nil { + deployment.Spec.MinReadySeconds = *original.minReadySeconds + } + deployment.Spec.ProgressDeadlineSeconds = original.progressDeadlineSeconds + if original.maxUnavailable == nil && original.maxSurge == nil { + deployment.Spec.Strategy.RollingUpdate = nil + return + } + if deployment.Spec.Strategy.RollingUpdate == nil { + deployment.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{} + } + deployment.Spec.Strategy.RollingUpdate.MaxUnavailable = original.maxUnavailable + deployment.Spec.Strategy.RollingUpdate.MaxSurge = original.maxSurge +} + +const EventDegradedPDBIncompatible = "MinReadyDegradedPDBIncompatible" +const EventDegradedDriftDetected = "MinReadyDegradedDriftDetected" + +var _ partitionstyle.Interface = (*MinReadyControl)(nil) diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go new file mode 100644 index 00000000..58e17b73 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go @@ -0,0 +1,263 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "strings" + "testing" + + apps "k8s.io/api/apps/v1" + policyv1 "k8s.io/api/policy/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/pointer" + + batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" + "github.com/openkruise/rollouts/pkg/feature" + utilfeature "github.com/openkruise/rollouts/pkg/util/feature" +) + +func init() { + _ = policyv1.AddToScheme(scheme) +} + +func TestMinReadyInitializeWritesOriginalAnnotationsAndInflatesFields(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newMinReadyDeployment() + control := newBuiltMinReadyControl(t, deployment) + + if err := control.Initialize(releaseDemo.DeepCopy()); err != nil { + t.Fatalf("Initialize failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + assertMinReadyInflated(t, got) + annotations := got.GetAnnotations() + assertAnnotation(t, annotations, AnnotationOriginalMinReadySeconds, "7") + assertAnnotation(t, annotations, AnnotationOriginalProgressDeadlineSeconds, "60") + assertAnnotation(t, annotations, AnnotationOriginalMaxUnavailable, "25%") + assertAnnotation(t, annotations, AnnotationOriginalMaxSurge, "1") +} + +func TestMinReadyInitializeIsIdempotentAndDoesNotOverwriteAnnotations(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newMinReadyDeployment() + deployment.Annotations = map[string]string{ + AnnotationOriginalMinReadySeconds: "5", + AnnotationOriginalProgressDeadlineSeconds: "30", + AnnotationOriginalMaxUnavailable: "10%", + AnnotationOriginalMaxSurge: "2", + } + inflateDeploymentStrategy(deployment) + control := newBuiltMinReadyControl(t, deployment) + + if err := control.Initialize(releaseDemo.DeepCopy()); err != nil { + t.Fatalf("Initialize failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + assertAnnotation(t, got.Annotations, AnnotationOriginalMinReadySeconds, "5") + assertAnnotation(t, got.Annotations, AnnotationOriginalProgressDeadlineSeconds, "30") + assertAnnotation(t, got.Annotations, AnnotationOriginalMaxUnavailable, "10%") + assertAnnotation(t, got.Annotations, AnnotationOriginalMaxSurge, "2") + assertMinReadyInflated(t, got) +} + +func TestMinReadyInitializeRejectsGitOpsDrift(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newMinReadyDeployment() + deployment.Annotations = map[string]string{ + AnnotationOriginalMinReadySeconds: "5", + AnnotationOriginalProgressDeadlineSeconds: "30", + AnnotationOriginalMaxUnavailable: "10%", + AnnotationOriginalMaxSurge: "2", + } + control := newBuiltMinReadyControl(t, deployment) + + err := control.Initialize(releaseDemo.DeepCopy()) + if err == nil || !strings.Contains(err.Error(), EventDegradedDriftDetected) { + t.Fatalf("Initialize error = %v, want drift detected", err) + } +} + +func TestMinReadyInitializeRejectsPartialOriginalAnnotations(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newMinReadyDeployment() + deployment.Annotations = map[string]string{ + AnnotationOriginalMinReadySeconds: "5", + } + control := newBuiltMinReadyControl(t, deployment) + + err := control.Initialize(releaseDemo.DeepCopy()) + if err == nil || !strings.Contains(err.Error(), AnnotationOriginalProgressDeadlineSeconds) { + t.Fatalf("Initialize error = %v, want missing annotation error", err) + } +} + +func TestMinReadyInitializeSerializesKubernetesDefaults(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newMinReadyDeployment() + deployment.Spec.ProgressDeadlineSeconds = nil + deployment.Spec.Strategy.RollingUpdate = nil + control := newBuiltMinReadyControl(t, deployment) + + if err := control.Initialize(releaseDemo.DeepCopy()); err != nil { + t.Fatalf("Initialize failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + assertAnnotation(t, got.Annotations, AnnotationOriginalProgressDeadlineSeconds, AnnotationValueKubernetesDefault) + assertAnnotation(t, got.Annotations, AnnotationOriginalMaxUnavailable, AnnotationValueKubernetesDefault) + assertAnnotation(t, got.Annotations, AnnotationOriginalMaxSurge, AnnotationValueKubernetesDefault) + assertMinReadyInflated(t, got) +} + +func TestMinReadyInitializeRejectsFeatureGateDisabled(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=false") + control := newBuiltMinReadyControl(t, newMinReadyDeployment()) + + err := control.Initialize(releaseDemo.DeepCopy()) + if err == nil || !strings.Contains(err.Error(), "feature gate is disabled") { + t.Fatalf("Initialize error = %v, want feature gate disabled", err) + } +} + +func TestMinReadyInitializeRejectsCoveringPDB(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newMinReadyDeployment() + pdb := &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{Name: "demo-pdb", Namespace: deployment.Namespace}, + Spec: policyv1.PodDisruptionBudgetSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "busybox"}}, + }, + } + control := newBuiltMinReadyControl(t, deployment, pdb) + + err := control.Initialize(releaseDemo.DeepCopy()) + if err == nil || !strings.Contains(err.Error(), EventDegradedPDBIncompatible) { + t.Fatalf("Initialize error = %v, want PDB incompatible", err) + } +} + +func TestMinReadyUpgradeBatchUpdatesMaxUnavailableOnly(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newMinReadyDeployment() + control := newBuiltMinReadyControl(t, deployment) + if err := control.Initialize(releaseDemo.DeepCopy()); err != nil { + t.Fatalf("Initialize failed: %v", err) + } + control.object = fetchMinReadyDeployment(t, control) + ctx := &batchcontext.BatchContext{ + CurrentBatch: 1, + Replicas: 10, + DesiredUpdatedReplicas: 5, + } + + if err := control.UpgradeBatch(ctx); err != nil { + t.Fatalf("UpgradeBatch failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 5 { + t.Fatalf("maxUnavailable = %v, want 5", unavailable) + } + if surge := got.Spec.Strategy.RollingUpdate.MaxSurge; surge == nil || surge.IntVal != InflatedMaxSurgeInt { + t.Fatalf("maxSurge = %v, want %d", surge, InflatedMaxSurgeInt) + } + if got.Spec.Strategy.Type != apps.RollingUpdateDeploymentStrategyType { + t.Fatalf("strategy.type = %q, want RollingUpdate", got.Spec.Strategy.Type) + } +} + +func TestMinReadyCalculateBatchContextUsesReadyReplicas(t *testing.T) { + release := releaseDemo.DeepCopy() + release.Status.CanaryStatus.CurrentBatch = 1 + release.Status.UpdateRevision = "version-2" + deployment := newMinReadyDeployment() + deployment.Status.Replicas = 10 + deployment.Status.UpdatedReplicas = 5 + deployment.Status.ReadyReplicas = 5 + control := newBuiltMinReadyControl(t, deployment) + + ctx, err := control.CalculateBatchContext(release) + if err != nil { + t.Fatalf("CalculateBatchContext failed: %v", err) + } + + if ctx.DesiredUpdatedReplicas != 5 || ctx.PlannedUpdatedReplicas != 5 { + t.Fatalf("desired/planned = %d/%d, want 5/5", ctx.DesiredUpdatedReplicas, ctx.PlannedUpdatedReplicas) + } + if ctx.UpdatedReadyReplicas != 5 { + t.Fatalf("UpdatedReadyReplicas = %d, want ReadyReplicas 5", ctx.UpdatedReadyReplicas) + } + if err := ctx.IsBatchReady(); err != nil { + t.Fatalf("IsBatchReady failed: %v", err) + } +} + +func TestMinReadyCalculateBatchContextNotReady(t *testing.T) { + release := releaseDemo.DeepCopy() + release.Status.CanaryStatus.CurrentBatch = 1 + deployment := newMinReadyDeployment() + deployment.Status.Replicas = 10 + deployment.Status.UpdatedReplicas = 5 + deployment.Status.ReadyReplicas = 4 + control := newBuiltMinReadyControl(t, deployment) + + ctx, err := control.CalculateBatchContext(release) + if err != nil { + t.Fatalf("CalculateBatchContext failed: %v", err) + } + if err := ctx.IsBatchReady(); err == nil { + t.Fatalf("IsBatchReady succeeded, want not ready error") + } +} + +func TestMinReadyCalculateBatchContextRecomputesAfterScaling(t *testing.T) { + release := releaseDemo.DeepCopy() + release.Status.CanaryStatus.CurrentBatch = 1 + deployment := newMinReadyDeployment() + deployment.Spec.Replicas = pointer.Int32(20) + deployment.Status.Replicas = 20 + deployment.Status.UpdatedReplicas = 10 + deployment.Status.ReadyReplicas = 10 + control := newBuiltMinReadyControl(t, deployment) + + ctx, err := control.CalculateBatchContext(release) + if err != nil { + t.Fatalf("CalculateBatchContext failed: %v", err) + } + if ctx.DesiredUpdatedReplicas != 10 { + t.Fatalf("DesiredUpdatedReplicas = %d, want 10", ctx.DesiredUpdatedReplicas) + } +} + +func TestMinReadyCalculateBatchContextReplicasZero(t *testing.T) { + release := releaseDemo.DeepCopy() + release.Status.CanaryStatus.CurrentBatch = 1 + deployment := newMinReadyDeployment() + deployment.Spec.Replicas = pointer.Int32(0) + deployment.Status.Replicas = 0 + control := newBuiltMinReadyControl(t, deployment) + + ctx, err := control.CalculateBatchContext(release) + if err != nil { + t.Fatalf("CalculateBatchContext failed: %v", err) + } + if ctx.DesiredUpdatedReplicas != 0 { + t.Fatalf("DesiredUpdatedReplicas = %d, want 0", ctx.DesiredUpdatedReplicas) + } +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_finalize_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_finalize_test.go new file mode 100644 index 00000000..65125d03 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_finalize_test.go @@ -0,0 +1,128 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "strings" + "testing" +) + +func TestMinReadyFinalizeRestoresOriginalValues(t *testing.T) { + deployment := newInflatedMinReadyDeployment() + deployment.Annotations = map[string]string{ + AnnotationOriginalMinReadySeconds: "7", + AnnotationOriginalProgressDeadlineSeconds: "60", + AnnotationOriginalMaxUnavailable: "25%", + AnnotationOriginalMaxSurge: "1", + } + control := newBuiltMinReadyControl(t, deployment) + + if err := control.Finalize(releaseDemo.DeepCopy()); err != nil { + t.Fatalf("Finalize failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + if got.Spec.MinReadySeconds != 7 { + t.Fatalf("minReadySeconds = %d, want 7", got.Spec.MinReadySeconds) + } + if got.Spec.ProgressDeadlineSeconds == nil || *got.Spec.ProgressDeadlineSeconds != 60 { + t.Fatalf("progressDeadlineSeconds = %v, want 60", got.Spec.ProgressDeadlineSeconds) + } + if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.StrVal != "25%" { + t.Fatalf("maxUnavailable = %v, want 25%%", unavailable) + } + if surge := got.Spec.Strategy.RollingUpdate.MaxSurge; surge == nil || surge.IntVal != 1 { + t.Fatalf("maxSurge = %v, want 1", surge) + } + for _, key := range AllOriginalAnnotations { + if _, ok := got.Annotations[key]; ok { + t.Fatalf("annotation %s still exists", key) + } + } +} + +func TestMinReadyFinalizeRestoresKubernetesDefaults(t *testing.T) { + deployment := newInflatedMinReadyDeployment() + deployment.Annotations = map[string]string{ + AnnotationOriginalMinReadySeconds: "0", + AnnotationOriginalProgressDeadlineSeconds: AnnotationValueKubernetesDefault, + AnnotationOriginalMaxUnavailable: AnnotationValueKubernetesDefault, + AnnotationOriginalMaxSurge: AnnotationValueKubernetesDefault, + } + control := newBuiltMinReadyControl(t, deployment) + + if err := control.Finalize(releaseDemo.DeepCopy()); err != nil { + t.Fatalf("Finalize failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + if got.Spec.MinReadySeconds != 0 { + t.Fatalf("minReadySeconds = %d, want 0", got.Spec.MinReadySeconds) + } + if got.Spec.ProgressDeadlineSeconds != nil { + t.Fatalf("progressDeadlineSeconds = %v, want nil", got.Spec.ProgressDeadlineSeconds) + } + if got.Spec.Strategy.RollingUpdate != nil { + t.Fatalf("rollingUpdate = %v, want nil", got.Spec.Strategy.RollingUpdate) + } +} + +func TestMinReadyFinalizeNoopWhenAnnotationsAbsent(t *testing.T) { + deployment := newInflatedMinReadyDeployment() + deployment.Annotations = nil + control := newBuiltMinReadyControl(t, deployment) + + if err := control.Finalize(releaseDemo.DeepCopy()); err != nil { + t.Fatalf("Finalize failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + assertMinReadyInflated(t, got) +} + +func TestMinReadyFinalizeRejectsPartialAnnotations(t *testing.T) { + deployment := newInflatedMinReadyDeployment() + deployment.Annotations = map[string]string{ + AnnotationOriginalMinReadySeconds: "7", + } + control := newBuiltMinReadyControl(t, deployment) + + err := control.Finalize(releaseDemo.DeepCopy()) + if err == nil || !strings.Contains(err.Error(), AnnotationOriginalProgressDeadlineSeconds) { + t.Fatalf("Finalize error = %v, want missing annotation error", err) + } + got := fetchMinReadyDeployment(t, control) + assertMinReadyInflated(t, got) +} + +func TestMinReadyFinalizeRejectsMalformedAnnotations(t *testing.T) { + deployment := newInflatedMinReadyDeployment() + deployment.Annotations = map[string]string{ + AnnotationOriginalMinReadySeconds: "7", + AnnotationOriginalProgressDeadlineSeconds: "bad", + AnnotationOriginalMaxUnavailable: "25%", + AnnotationOriginalMaxSurge: "1", + } + control := newBuiltMinReadyControl(t, deployment) + + err := control.Finalize(releaseDemo.DeepCopy()) + if err == nil || !strings.Contains(err.Error(), "malformed int32") { + t.Fatalf("Finalize error = %v, want malformed int32 error", err) + } + got := fetchMinReadyDeployment(t, control) + assertMinReadyInflated(t, got) +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_pdb.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_pdb.go new file mode 100644 index 00000000..026e08b3 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_pdb.go @@ -0,0 +1,51 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "context" + "fmt" + + policyv1 "k8s.io/api/policy/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +func (mc *MinReadyControl) hasPDBCoveringDeployment() (bool, error) { + pdbList := &policyv1.PodDisruptionBudgetList{} + if err := mc.client.List(context.TODO(), pdbList, client.InNamespace(mc.object.Namespace)); err != nil { + return false, fmt.Errorf("%s: list PDBs: %w", EventDegradedPDBIncompatible, err) + } + templateLabels := labels.Set(mc.object.Spec.Template.Labels) + for i := range pdbList.Items { + covered, err := pdbCoversLabels(&pdbList.Items[i], templateLabels) + if err != nil || covered { + return covered, err + } + } + return false, nil +} + +func pdbCoversLabels(pdb *policyv1.PodDisruptionBudget, templateLabels labels.Set) (bool, error) { + selector, err := metav1.LabelSelectorAsSelector(pdb.Spec.Selector) + if err != nil { + return false, fmt.Errorf("%s: invalid PDB selector %s/%s: %w", + EventDegradedPDBIncompatible, pdb.Namespace, pdb.Name, err) + } + return selector.Matches(templateLabels), nil +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_test_helpers_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_test_helpers_test.go new file mode 100644 index 00000000..d9ccd2ee --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_test_helpers_test.go @@ -0,0 +1,115 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "context" + "testing" + + apps "k8s.io/api/apps/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func newMinReadyDeployment() *apps.Deployment { + progressDeadline := int32(60) + maxUnavailable := intstr.FromString("25%") + maxSurge := intstr.FromInt(1) + deployment := deploymentDemo.DeepCopy() + deployment.ResourceVersion = "1" + deployment.Spec.MinReadySeconds = 7 + deployment.Spec.ProgressDeadlineSeconds = &progressDeadline + deployment.Spec.Strategy.Type = apps.RollingUpdateDeploymentStrategyType + deployment.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{ + MaxUnavailable: &maxUnavailable, + MaxSurge: &maxSurge, + } + return deployment +} + +func newInflatedMinReadyDeployment() *apps.Deployment { + deployment := newMinReadyDeployment() + inflateDeploymentStrategy(deployment) + return deployment +} + +func newBuiltMinReadyControl(t *testing.T, deployment *apps.Deployment, objs ...interface{}) *MinReadyControl { + t.Helper() + objects := []interface{}{deployment} + objects = append(objects, objs...) + builder := fake.NewClientBuilder().WithScheme(scheme).WithObjects(toClientObjects(t, objects)...) + rc := NewController(builder.Build(), types.NamespacedName{ + Namespace: deployment.Namespace, + Name: deployment.Name, + }, deployment.GroupVersionKind()) + built, err := (&MinReadyControl{realController: rc.(*realController)}).BuildController() + if err != nil { + t.Fatalf("BuildController failed: %v", err) + } + return built.(*MinReadyControl) +} + +func toClientObjects(t *testing.T, objects []interface{}) []client.Object { + t.Helper() + result := make([]client.Object, 0, len(objects)) + for _, object := range objects { + typed, ok := object.(client.Object) + if !ok { + t.Fatalf("object %T does not implement client.Object", object) + } + result = append(result, typed) + } + return result +} + +func fetchMinReadyDeployment(t *testing.T, control *MinReadyControl) *apps.Deployment { + t.Helper() + got := &apps.Deployment{} + key := types.NamespacedName{Namespace: control.object.Namespace, Name: control.object.Name} + if err := control.client.Get(context.TODO(), key, got); err != nil { + t.Fatalf("Get deployment failed: %v", err) + } + return got +} + +func assertMinReadyInflated(t *testing.T, deployment *apps.Deployment) { + t.Helper() + if deployment.Spec.Strategy.Type != apps.RollingUpdateDeploymentStrategyType { + t.Fatalf("strategy.type = %q, want RollingUpdate", deployment.Spec.Strategy.Type) + } + if deployment.Spec.MinReadySeconds != InflatedMinReadySeconds { + t.Fatalf("minReadySeconds = %d, want %d", deployment.Spec.MinReadySeconds, InflatedMinReadySeconds) + } + if deployment.Spec.ProgressDeadlineSeconds == nil || *deployment.Spec.ProgressDeadlineSeconds != InflatedProgressDeadlineSeconds { + t.Fatalf("progressDeadlineSeconds = %v, want %d", deployment.Spec.ProgressDeadlineSeconds, InflatedProgressDeadlineSeconds) + } + if got := deployment.Spec.Strategy.RollingUpdate.MaxUnavailable; got == nil || got.IntVal != 0 { + t.Fatalf("maxUnavailable = %v, want 0", got) + } + if got := deployment.Spec.Strategy.RollingUpdate.MaxSurge; got == nil || got.IntVal != InflatedMaxSurgeInt { + t.Fatalf("maxSurge = %v, want %d", got, InflatedMaxSurgeInt) + } +} + +func assertAnnotation(t *testing.T, annotations map[string]string, key, want string) { + t.Helper() + if got := annotations[key]; got != want { + t.Fatalf("annotation %s = %q, want %q", key, got, want) + } +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/minready_status.go b/pkg/controller/batchrelease/control/partitionstyle/minready_status.go new file mode 100644 index 00000000..b3640f71 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/minready_status.go @@ -0,0 +1,126 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package partitionstyle + +import ( + "strings" + "time" + + v1 "k8s.io/api/core/v1" + + "github.com/openkruise/rollouts/api/v1beta1" + brmetrics "github.com/openkruise/rollouts/pkg/controller/batchrelease/metrics" + "github.com/openkruise/rollouts/pkg/util" +) + +func (rc *realBatchControlPlane) isMinReadyRelease() bool { + return rc.release.Spec.ReleasePlan.DeploymentStrategy == v1beta1.DeploymentStrategyMinReadySeconds +} + +func (rc *realBatchControlPlane) recordMinReadyNormal(condType v1beta1.RolloutConditionType, reason, message string) { + if !rc.isMinReadyRelease() { + return + } + previousCondition := util.GetBatchReleaseCondition(*rc.newStatus, condType) + condition := util.NewRolloutCondition(condType, v1.ConditionTrue, reason, message) + util.SetBatchReleaseCondition(rc.newStatus, *condition) + clearMinReadyDegraded(rc.newStatus) + rc.newStatus.Message = "" + if reason == "MinReadyBatchReady" { + observeMinReadyBatchDuration(rc.release, previousCondition) + brmetrics.RecordMinReadyBatch(rc.release, brmetrics.BatchResultSuccess) + } + brmetrics.ClearMinReadyStuckSeconds(rc.release, brmetrics.StuckReasonBatchReadyTimeout) + rc.Event(rc.release, v1.EventTypeNormal, reason, message) +} + +func observeMinReadyBatchDuration(release *v1beta1.BatchRelease, condition *v1beta1.RolloutCondition) { + if release == nil || condition == nil || condition.LastTransitionTime.IsZero() { + return + } + duration := time.Since(condition.LastTransitionTime.Time) + if duration < 0 { + return + } + brmetrics.ObserveMinReadyBatchDuration(release, duration) +} + +func (rc *realBatchControlPlane) recordMinReadyDegraded(reason string, err error) { + if !rc.isMinReadyRelease() || err == nil { + return + } + message := err.Error() + eventReason := minReadyDegradedEventReason(reason, message) + condition := util.NewRolloutCondition(v1beta1.RolloutConditionMinReadyDegraded, v1.ConditionTrue, eventReason, message) + util.SetBatchReleaseCondition(rc.newStatus, *condition) + rc.newStatus.Message = message + degradedReason := minReadyDegradedMetricReason(message) + brmetrics.ClearMinReadyStuckSeconds(rc.release, brmetrics.StuckReasonBatchReadyTimeout) + brmetrics.RecordMinReadyBatch(rc.release, brmetrics.BatchResultDegraded) + brmetrics.RecordMinReadyDegraded(rc.release, degradedReason) + rc.Event(rc.release, v1.EventTypeWarning, eventReason, message) +} + +func observeMinReadyBatchWait(release *v1beta1.BatchRelease, condition *v1beta1.RolloutCondition) { + if release == nil || condition == nil || condition.LastTransitionTime.IsZero() { + return + } + duration := time.Since(condition.LastTransitionTime.Time) + if duration < 0 { + return + } + brmetrics.SetMinReadyStuckSeconds(release, brmetrics.StuckReasonBatchReadyTimeout, duration.Seconds()) +} + +func clearMinReadyDegraded(status *v1beta1.BatchReleaseStatus) { + condition := util.NewRolloutCondition(v1beta1.RolloutConditionMinReadyDegraded, v1.ConditionFalse, "MinReadyHealthy", "") + util.SetBatchReleaseCondition(status, *condition) +} + +func minReadyDegradedMetricReason(message string) string { + switch { + case strings.Contains(message, "feature gate is disabled"): + return brmetrics.DegradedReasonFeatureGateDisabled + case strings.Contains(message, "MinReadyDegradedPDBIncompatible"): + return brmetrics.DegradedReasonPDBIncompatible + case strings.Contains(message, "annotation ") && strings.Contains(message, "missing"): + return brmetrics.DegradedReasonMissingAnnotations + case strings.Contains(message, "annotation ") && strings.Contains(message, "malformed"): + return brmetrics.DegradedReasonMissingAnnotations + case strings.Contains(message, "MinReadyDegradedDriftDetected"): + return brmetrics.DegradedReasonGitOpsDrift + default: + return brmetrics.DegradedReasonControllerError + } +} + +func minReadyDegradedEventReason(fallback, message string) string { + switch { + case strings.Contains(message, "feature gate is disabled"): + return "MinReadyFeatureGateDisabled" + case strings.Contains(message, "MinReadyDegradedPDBIncompatible"): + return "MinReadyDegradedPDBIncompatible" + case strings.Contains(message, "annotation ") && strings.Contains(message, "missing"): + return "MinReadyDegradedMissingAnnotations" + case strings.Contains(message, "annotation ") && strings.Contains(message, "malformed"): + return "MinReadyDegradedMissingAnnotations" + case strings.Contains(message, "MinReadyDegradedDriftDetected"): + return "MinReadyDegradedDriftDetected" + default: + return fallback + } +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/minready_status_test.go b/pkg/controller/batchrelease/control/partitionstyle/minready_status_test.go new file mode 100644 index 00000000..876d227c --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/minready_status_test.go @@ -0,0 +1,159 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package partitionstyle + +import ( + "testing" + "time" + + dto "github.com/prometheus/client_model/go" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/record" + ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" + + "github.com/openkruise/rollouts/api/v1beta1" + brmetrics "github.com/openkruise/rollouts/pkg/controller/batchrelease/metrics" + "github.com/openkruise/rollouts/pkg/util" +) + +func TestRecordMinReadyNormalObservesBatchDuration(t *testing.T) { + release := &v1beta1.BatchRelease{ + ObjectMeta: metav1.ObjectMeta{Name: "duration-rollout", Namespace: "default"}, + Spec: v1beta1.BatchReleaseSpec{ + ReleasePlan: v1beta1.ReleasePlan{DeploymentStrategy: v1beta1.DeploymentStrategyMinReadySeconds}, + }, + } + status := &v1beta1.BatchReleaseStatus{} + startedAt := metav1.NewTime(time.Now().Add(-3 * time.Second)) + util.SetBatchReleaseCondition(status, v1beta1.RolloutCondition{ + Type: v1beta1.RolloutConditionMinReadyBatching, + Status: v1.ConditionTrue, + Reason: "MinReadyBatching", + Message: "MinReadySeconds strategy advanced the current batch", + LastTransitionTime: startedAt, + LastUpdateTime: startedAt, + }) + + rc := &realBatchControlPlane{ + EventRecorder: record.NewFakeRecorder(1), + release: release, + newStatus: status, + } + + rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatchReady", "MinReadySeconds strategy batch is ready") + + histogram := findHistogramMetric(t, "rollout_minready_batch_duration_seconds", map[string]string{ + "rollout": release.Name, + "namespace": release.Namespace, + }) + if histogram.GetSampleCount() == 0 { + t.Fatalf("histogram sample count = %d, want > 0", histogram.GetSampleCount()) + } + if status.Message != "" { + t.Fatalf("status.message = %q, want empty", status.Message) + } +} + +func TestObserveMinReadyBatchWaitSetsStuckGauge(t *testing.T) { + release := &v1beta1.BatchRelease{ + ObjectMeta: metav1.ObjectMeta{Name: "stuck-rollout", Namespace: "default"}, + } + startedAt := metav1.NewTime(time.Now().Add(-4 * time.Second)) + condition := &v1beta1.RolloutCondition{ + Type: v1beta1.RolloutConditionMinReadyBatching, + Status: v1.ConditionTrue, + Reason: "MinReadyBatching", + Message: "MinReadySeconds strategy advanced the current batch", + LastTransitionTime: startedAt, + LastUpdateTime: startedAt, + } + + observeMinReadyBatchWait(release, condition) + + gauge := findGaugeMetric(t, "rollout_minready_stuck_seconds", map[string]string{ + "rollout": release.Name, + "namespace": release.Namespace, + "reason": "batch_ready_timeout", + }) + if gauge.GetValue() <= 0 { + t.Fatalf("gauge value = %v, want > 0", gauge.GetValue()) + } +} + +func TestMinReadyDegradedMetricReasonDetectsDrift(t *testing.T) { + message := "MinReadyControl.UpgradeBatch[1]: MinReadyDegradedDriftDetected: maxUnavailable=3 exceeds target=2" + if got := minReadyDegradedMetricReason(message); got != brmetrics.DegradedReasonGitOpsDrift { + t.Fatalf("metric reason = %q, want %q", got, brmetrics.DegradedReasonGitOpsDrift) + } +} + +func findHistogramMetric(t *testing.T, name string, labels map[string]string) *dto.Histogram { + t.Helper() + families, err := ctrlmetrics.Registry.Gather() + if err != nil { + t.Fatalf("gather metrics failed: %v", err) + } + for _, family := range families { + if family.GetName() != name { + continue + } + for _, metric := range family.GetMetric() { + if metricLabelsMatch(metric, labels) { + return metric.GetHistogram() + } + } + } + t.Fatalf("histogram %s with labels %v not found", name, labels) + return nil +} + +func findGaugeMetric(t *testing.T, name string, labels map[string]string) *dto.Gauge { + t.Helper() + families, err := ctrlmetrics.Registry.Gather() + if err != nil { + t.Fatalf("gather metrics failed: %v", err) + } + for _, family := range families { + if family.GetName() != name { + continue + } + for _, metric := range family.GetMetric() { + if metricLabelsMatch(metric, labels) { + return metric.GetGauge() + } + } + } + t.Fatalf("gauge %s with labels %v not found", name, labels) + return nil +} + +func metricLabelsMatch(metric *dto.Metric, labels map[string]string) bool { + for key, want := range labels { + matched := false + for _, pair := range metric.GetLabel() { + if pair.GetName() == key && pair.GetValue() == want { + matched = true + break + } + } + if !matched { + return false + } + } + return true +} diff --git a/pkg/controller/batchrelease/metrics/minready_metrics.go b/pkg/controller/batchrelease/metrics/minready_metrics.go new file mode 100644 index 00000000..f90ef6c3 --- /dev/null +++ b/pkg/controller/batchrelease/metrics/minready_metrics.go @@ -0,0 +1,115 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "time" + + "github.com/prometheus/client_golang/prometheus" + ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" + + "github.com/openkruise/rollouts/api/v1beta1" +) + +const ( + BatchResultSuccess = "success" + BatchResultStuck = "stuck" + BatchResultDegraded = "degraded" + + DegradedReasonControllerError = "controller_error" + DegradedReasonFeatureGateDisabled = "feature_gate_disabled" + DegradedReasonGitOpsDrift = "gitops_drift" + DegradedReasonMissingAnnotations = "missing_annotations" + DegradedReasonPDBIncompatible = "pdb_incompatible" + StuckReasonBatchReadyTimeout = "batch_ready_timeout" +) + +var ( + minReadyBatchesTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "rollout_minready_batches_total", + Help: "Total number of MinReadySeconds rollout batches by result.", + }, + []string{"rollout", "namespace", "result"}, + ) + minReadyBatchDurationSeconds = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "rollout_minready_batch_duration_seconds", + Help: "Duration in seconds from MinReadySeconds batch upgrade to readiness.", + Buckets: []float64{5, 15, 30, 60, 180, 600, 1800}, + }, + []string{"rollout", "namespace"}, + ) + minReadyStuckSeconds = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "rollout_minready_stuck_seconds", + Help: "Current MinReadySeconds stuck duration in seconds by reason.", + }, + []string{"rollout", "namespace", "reason"}, + ) + minReadyDegradedTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "rollout_minready_degraded_total", + Help: "Total number of MinReadySeconds degraded transitions by reason.", + }, + []string{"rollout", "namespace", "reason"}, + ) +) + +func init() { + ctrlmetrics.Registry.MustRegister( + minReadyBatchesTotal, + minReadyBatchDurationSeconds, + minReadyStuckSeconds, + minReadyDegradedTotal, + ) +} + +func RecordMinReadyBatch(release *v1beta1.BatchRelease, result string) { + if release == nil { + return + } + minReadyBatchesTotal.WithLabelValues(release.Name, release.Namespace, result).Inc() +} + +func ObserveMinReadyBatchDuration(release *v1beta1.BatchRelease, duration time.Duration) { + if release == nil { + return + } + minReadyBatchDurationSeconds.WithLabelValues(release.Name, release.Namespace).Observe(duration.Seconds()) +} + +func SetMinReadyStuckSeconds(release *v1beta1.BatchRelease, reason string, seconds float64) { + if release == nil { + return + } + minReadyStuckSeconds.WithLabelValues(release.Name, release.Namespace, reason).Set(seconds) +} + +func ClearMinReadyStuckSeconds(release *v1beta1.BatchRelease, reason string) { + if release == nil { + return + } + minReadyStuckSeconds.WithLabelValues(release.Name, release.Namespace, reason).Set(0) +} + +func RecordMinReadyDegraded(release *v1beta1.BatchRelease, reason string) { + if release == nil { + return + } + minReadyDegradedTotal.WithLabelValues(release.Name, release.Namespace, reason).Inc() +} diff --git a/pkg/controller/batchrelease/metrics/minready_metrics_test.go b/pkg/controller/batchrelease/metrics/minready_metrics_test.go new file mode 100644 index 00000000..4e09f6fb --- /dev/null +++ b/pkg/controller/batchrelease/metrics/minready_metrics_test.go @@ -0,0 +1,85 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "testing" + "time" + + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/openkruise/rollouts/api/v1beta1" +) + +func TestMinReadyMetricsRecorders(t *testing.T) { + release := &v1beta1.BatchRelease{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rollout-a", + Namespace: "default", + }, + } + + RecordMinReadyBatch(release, BatchResultSuccess) + ObserveMinReadyBatchDuration(release, 2*time.Second) + SetMinReadyStuckSeconds(release, StuckReasonBatchReadyTimeout, 3) + ClearMinReadyStuckSeconds(release, StuckReasonBatchReadyTimeout) + RecordMinReadyDegraded(release, DegradedReasonPDBIncompatible) + + assertCounterPositive(t, minReadyBatchesTotal.WithLabelValues("rollout-a", "default", BatchResultSuccess)) + histogram, ok := minReadyBatchDurationSeconds.WithLabelValues("rollout-a", "default").(prometheus.Metric) + if !ok { + t.Fatalf("histogram observer does not implement prometheus.Metric") + } + assertHistogramCountPositive(t, histogram) + assertGaugeValue(t, minReadyStuckSeconds.WithLabelValues("rollout-a", "default", StuckReasonBatchReadyTimeout), 0) + assertCounterPositive(t, minReadyDegradedTotal.WithLabelValues("rollout-a", "default", DegradedReasonPDBIncompatible)) +} + +func assertCounterPositive(t *testing.T, metric interface{ Write(*dto.Metric) error }) { + t.Helper() + var got dto.Metric + if err := metric.Write(&got); err != nil { + t.Fatalf("write metric failed: %v", err) + } + if got.Counter == nil || got.Counter.GetValue() <= 0 { + t.Fatalf("counter = %v, want positive", got.Counter) + } +} + +func assertGaugeValue(t *testing.T, metric interface{ Write(*dto.Metric) error }, want float64) { + t.Helper() + var got dto.Metric + if err := metric.Write(&got); err != nil { + t.Fatalf("write metric failed: %v", err) + } + if got.Gauge == nil || got.Gauge.GetValue() != want { + t.Fatalf("gauge = %v, want %v", got.Gauge, want) + } +} + +func assertHistogramCountPositive(t *testing.T, metric interface{ Write(*dto.Metric) error }) { + t.Helper() + var got dto.Metric + if err := metric.Write(&got); err != nil { + t.Fatalf("write metric failed: %v", err) + } + if got.Histogram == nil || got.Histogram.GetSampleCount() == 0 { + t.Fatalf("histogram = %v, want sample count > 0", got.Histogram) + } +} diff --git a/pkg/controller/rollout/rollout_canary.go b/pkg/controller/rollout/rollout_canary.go index 0cecbac2..9969737b 100644 --- a/pkg/controller/rollout/rollout_canary.go +++ b/pkg/controller/rollout/rollout_canary.go @@ -411,6 +411,7 @@ func (m *canaryReleaseManager) createBatchRelease(rollout *v1beta1.Rollout, roll FailureThreshold: rollout.Spec.Strategy.Canary.FailureThreshold, PatchPodTemplateMetadata: rollout.Spec.Strategy.Canary.PatchPodTemplateMetadata, RollingStyle: rollout.Spec.Strategy.GetRollingStyle(), + DeploymentStrategy: rollout.Spec.Strategy.Canary.DeploymentStrategy, EnableExtraWorkloadForCanary: rollout.Spec.Strategy.Canary.EnableExtraWorkloadForCanary, }, }, diff --git a/pkg/controller/rollout/rollout_releaseManager_test.go b/pkg/controller/rollout/rollout_releaseManager_test.go index 6f8e1343..69d118ec 100644 --- a/pkg/controller/rollout/rollout_releaseManager_test.go +++ b/pkg/controller/rollout/rollout_releaseManager_test.go @@ -342,6 +342,26 @@ func (m *mockReleaseManager) createBatchRelease(rollout *v1beta1.Rollout, rollou return br } +func TestCanaryReleaseManagerPassesDeploymentStrategyToBatchRelease(t *testing.T) { + rollout := &v1beta1.Rollout{ + ObjectMeta: metav1.ObjectMeta{Name: "my-rollout", Namespace: "default"}, + Spec: v1beta1.RolloutSpec{ + WorkloadRef: v1beta1.ObjectRef{APIVersion: "apps/v1", Kind: "Deployment", Name: "my-app"}, + Strategy: v1beta1.RolloutStrategy{ + Canary: &v1beta1.CanaryStrategy{ + DeploymentStrategy: v1beta1.DeploymentStrategyMinReadySeconds, + Steps: []v1beta1.CanaryStep{ + {Replicas: &intstr.IntOrString{Type: intstr.String, StrVal: "10%"}}, + }, + }, + }, + }, + } + + br := (&canaryReleaseManager{}).createBatchRelease(rollout, "rollout-id", 0, false) + assert.Equal(t, v1beta1.DeploymentStrategyMinReadySeconds, br.Spec.ReleasePlan.DeploymentStrategy) +} + func TestRunBatchRelease(t *testing.T) { rollout := &v1beta1.Rollout{ ObjectMeta: metav1.ObjectMeta{Name: "my-rollout", Namespace: "default", UID: "rollout-uid-12345"}, diff --git a/pkg/feature/rollout_features.go b/pkg/feature/rollout_features.go index f8f48f16..fd3a8c10 100644 --- a/pkg/feature/rollout_features.go +++ b/pkg/feature/rollout_features.go @@ -33,6 +33,8 @@ const ( // If the rollout CR is deleted during the rollout process, `pause=false` and `partition=0` will be set, causing the workload to complete deployment. // If `KeepWorkloadPausedOnRolloutDeletion` is set, the state during deployment will be preserved(Keep partition > 0), enabling users to perform rollback operations. KeepWorkloadPausedOnRolloutDeletion featuregate.Feature = "KeepWorkloadPausedOnRolloutDeletion" + // MinReadySecondsStrategy enables the alpha Deployment MinReadySeconds rollout strategy. + MinReadySecondsStrategy featuregate.Feature = "MinReadySecondsStrategy" ) var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{ @@ -40,6 +42,7 @@ var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{ AdvancedDeploymentGate: {Default: false, PreRelease: featuregate.Alpha}, AppendServiceSelectorGate: {Default: false, PreRelease: featuregate.Alpha}, KeepWorkloadPausedOnRolloutDeletion: {Default: false, PreRelease: featuregate.Alpha}, + MinReadySecondsStrategy: {Default: false, PreRelease: featuregate.Alpha}, } func init() { diff --git a/pkg/feature/rollout_features_test.go b/pkg/feature/rollout_features_test.go new file mode 100644 index 00000000..55d8e953 --- /dev/null +++ b/pkg/feature/rollout_features_test.go @@ -0,0 +1,29 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package feature + +import ( + "testing" + + utilfeature "github.com/openkruise/rollouts/pkg/util/feature" +) + +func TestMinReadySecondsStrategyDefaultDisabled(t *testing.T) { + if utilfeature.DefaultFeatureGate.Enabled(MinReadySecondsStrategy) { + t.Fatalf("feature gate %s is enabled by default", MinReadySecondsStrategy) + } +} diff --git a/pkg/webhook/workload/mutating/workload_update_handler.go b/pkg/webhook/workload/mutating/workload_update_handler.go index 5726a384..ced5d3cf 100644 --- a/pkg/webhook/workload/mutating/workload_update_handler.go +++ b/pkg/webhook/workload/mutating/workload_update_handler.go @@ -41,8 +41,10 @@ import ( appsv1alpha1 "github.com/openkruise/rollouts/api/v1alpha1" appsv1beta1 "github.com/openkruise/rollouts/api/v1beta1" + "github.com/openkruise/rollouts/pkg/feature" "github.com/openkruise/rollouts/pkg/util" utilclient "github.com/openkruise/rollouts/pkg/util/client" + utilfeature "github.com/openkruise/rollouts/pkg/util/feature" util2 "github.com/openkruise/rollouts/pkg/webhook/util" "github.com/openkruise/rollouts/pkg/webhook/util/configuration" ) @@ -238,6 +240,9 @@ func (h *WorkloadHandler) handleDeployment(newObj, oldObj *apps.Deployment) (boo // in rollout progressing if newObj.Annotations[util.InRolloutProgressingAnnotation] != "" { modified := false + if shouldSkipRecreateMutationForMinReady(rollout) { + return false, nil + } strategy := util.GetDeploymentStrategy(newObj) // partition if strings.EqualFold(string(strategy.RollingStyle), string(appsv1alpha1.PartitionRollingStyle)) { @@ -324,8 +329,10 @@ func (h *WorkloadHandler) handleDeployment(newObj, oldObj *apps.Deployment) (boo newObj.Labels[appsv1alpha1.DeploymentStableRevisionLabel] = stableRS.Labels[apps.DefaultDeploymentUniqueLabelKey] } - // need set workload paused = true - newObj.Spec.Paused = true + if !shouldSkipRecreateMutationForMinReady(rollout) { + // Partition/Recreate style disables the native Deployment controller. + newObj.Spec.Paused = true + } state := &util.RolloutState{RolloutName: rollout.Name} by, _ := json.Marshal(state) if newObj.Annotations == nil { @@ -451,6 +458,12 @@ func isEffectiveDeploymentRevisionChange(oldObj, newObj *apps.Deployment) bool { return true } +func shouldSkipRecreateMutationForMinReady(rollout *appsv1beta1.Rollout) bool { + return rollout.Spec.Strategy.Canary != nil && + rollout.Spec.Strategy.Canary.DeploymentStrategy == appsv1beta1.DeploymentStrategyMinReadySeconds && + utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) +} + func setDeploymentStrategyAnnotation(strategy appsv1alpha1.DeploymentStrategy, d *apps.Deployment) { strategyAnno, _ := json.Marshal(&strategy) d.Annotations[appsv1alpha1.DeploymentStrategyAnnotation] = string(strategyAnno) diff --git a/pkg/webhook/workload/mutating/workload_update_handler_test.go b/pkg/webhook/workload/mutating/workload_update_handler_test.go index 8d7a0732..788e3df9 100644 --- a/pkg/webhook/workload/mutating/workload_update_handler_test.go +++ b/pkg/webhook/workload/mutating/workload_update_handler_test.go @@ -44,7 +44,9 @@ import ( rolloutapi "github.com/openkruise/rollouts/api" appsv1alpha1 "github.com/openkruise/rollouts/api/v1alpha1" appsv1beta1 "github.com/openkruise/rollouts/api/v1beta1" + "github.com/openkruise/rollouts/pkg/feature" "github.com/openkruise/rollouts/pkg/util" + utilfeature "github.com/openkruise/rollouts/pkg/util/feature" "github.com/openkruise/rollouts/pkg/webhook/util/configuration" ) @@ -419,6 +421,32 @@ func TestHandlerDeployment(t *testing.T) { return rolloutDemo.DeepCopy() }, }, + { + name: "deployment image v1->v2, matched minready rollout keeps deployment unpaused", + getObjs: func() (*apps.Deployment, *apps.Deployment) { + oldObj := deploymentDemo.DeepCopy() + newObj := deploymentDemo.DeepCopy() + newObj.Spec.Template.Spec.Containers[0].Image = "echoserver:v2" + return oldObj, newObj + }, + expectObj: func() *apps.Deployment { + obj := deploymentDemo.DeepCopy() + obj.Spec.Template.Spec.Containers[0].Image = "echoserver:v2" + obj.Annotations[util.InRolloutProgressingAnnotation] = `{"rolloutName":"rollout-demo"}` + obj.Spec.Paused = false + return obj + }, + getRs: func() []*apps.ReplicaSet { + rs := rsDemo.DeepCopy() + return []*apps.ReplicaSet{rs} + }, + getRollout: func() *appsv1beta1.Rollout { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + obj := rolloutDemo.DeepCopy() + obj.Spec.Strategy.Canary.DeploymentStrategy = appsv1beta1.DeploymentStrategyMinReadySeconds + return obj + }, + }, { name: "deployment image v1->v2, no matched rollout", getObjs: func() (*apps.Deployment, *apps.Deployment) { @@ -541,6 +569,74 @@ func TestHandlerDeployment(t *testing.T) { return rolloutDemo.DeepCopy() }, }, + { + name: "minready deployment in progressing skips recreate mutation", + getObjs: func() (*apps.Deployment, *apps.Deployment) { + oldObj := deploymentDemo.DeepCopy() + oldObj.Spec.Template.Spec.Containers[0].Image = "echoserver:v2" + oldObj.Annotations[util.InRolloutProgressingAnnotation] = `{"rolloutName":"rollout-demo","RolloutDone":false}` + newObj := oldObj.DeepCopy() + newObj.Spec.Paused = false + newObj.Spec.Strategy.Type = apps.RollingUpdateDeploymentStrategyType + newObj.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{ + MaxUnavailable: &intstr.IntOrString{Type: intstr.Int, IntVal: 2}, + MaxSurge: &intstr.IntOrString{Type: intstr.Int, IntVal: 0}, + } + newObj.Annotations[appsv1alpha1.DeploymentStrategyAnnotation] = `{"rollingStyle":"Partition"}` + return oldObj, newObj + }, + expectObj: func() *apps.Deployment { + obj := deploymentDemo.DeepCopy() + obj.Spec.Template.Spec.Containers[0].Image = "echoserver:v2" + obj.Annotations[util.InRolloutProgressingAnnotation] = `{"rolloutName":"rollout-demo","RolloutDone":false}` + obj.Spec.Paused = false + obj.Spec.Strategy.Type = apps.RollingUpdateDeploymentStrategyType + obj.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{ + MaxUnavailable: &intstr.IntOrString{Type: intstr.Int, IntVal: 2}, + MaxSurge: &intstr.IntOrString{Type: intstr.Int, IntVal: 0}, + } + obj.Annotations[appsv1alpha1.DeploymentStrategyAnnotation] = `{"rollingStyle":"Partition"}` + return obj + }, + getRs: func() []*apps.ReplicaSet { + rs := rsDemo.DeepCopy() + return []*apps.ReplicaSet{rs} + }, + getRollout: func() *appsv1beta1.Rollout { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + obj := rolloutDemo.DeepCopy() + obj.Spec.Strategy.Canary.DeploymentStrategy = appsv1beta1.DeploymentStrategyMinReadySeconds + return obj + }, + }, + { + name: "minready deployment in progressing without strategy annotation keeps deployment unpaused", + getObjs: func() (*apps.Deployment, *apps.Deployment) { + oldObj := deploymentDemo.DeepCopy() + oldObj.Spec.Template.Spec.Containers[0].Image = "echoserver:v2" + oldObj.Annotations[util.InRolloutProgressingAnnotation] = `{"rolloutName":"rollout-demo","RolloutDone":false}` + newObj := oldObj.DeepCopy() + newObj.Spec.Paused = false + return oldObj, newObj + }, + expectObj: func() *apps.Deployment { + obj := deploymentDemo.DeepCopy() + obj.Spec.Template.Spec.Containers[0].Image = "echoserver:v2" + obj.Annotations[util.InRolloutProgressingAnnotation] = `{"rolloutName":"rollout-demo","RolloutDone":false}` + obj.Spec.Paused = false + return obj + }, + getRs: func() []*apps.ReplicaSet { + rs := rsDemo.DeepCopy() + return []*apps.ReplicaSet{rs} + }, + getRollout: func() *appsv1beta1.Rollout { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + obj := rolloutDemo.DeepCopy() + obj.Spec.Strategy.Canary.DeploymentStrategy = appsv1beta1.DeploymentStrategyMinReadySeconds + return obj + }, + }, { name: "set deployment paused = false, matched rollout, in finalising, allow", getObjs: func() (*apps.Deployment, *apps.Deployment) { @@ -778,6 +874,23 @@ func TestHandlerDeployment(t *testing.T) { } } +func TestShouldSkipRecreateMutationForMinReady(t *testing.T) { + rollout := rolloutDemo.DeepCopy() + rollout.Spec.Strategy.Canary.DeploymentStrategy = appsv1beta1.DeploymentStrategyMinReadySeconds + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=false") + if shouldSkipRecreateMutationForMinReady(rollout) { + t.Fatalf("skip returned true while feature gate is disabled") + } + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + if !shouldSkipRecreateMutationForMinReady(rollout) { + t.Fatalf("skip returned false for MinReadySeconds with feature gate enabled") + } + rollout.Spec.Strategy.Canary.DeploymentStrategy = appsv1beta1.DeploymentStrategyRecreate + if shouldSkipRecreateMutationForMinReady(rollout) { + t.Fatalf("skip returned true for Recreate strategy") + } +} + func TestHandlerCloneSet(t *testing.T) { cases := []struct { name string diff --git a/test/e2e/deployment_minready_actions_test.go b/test/e2e/deployment_minready_actions_test.go new file mode 100644 index 00000000..2fcf09ce --- /dev/null +++ b/test/e2e/deployment_minready_actions_test.go @@ -0,0 +1,140 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + "strconv" + "strings" + "time" + + . "github.com/onsi/gomega" + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/util/retry" + "k8s.io/utils/pointer" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/openkruise/rollouts/api/v1beta1" +) + +func finishMinReadyE2ERollout(namespace, name string) { + resumeMinReadyE2ERollout(namespace, name) + resumeMinReadyE2ERollout(namespace, name) + waitMinReadyE2ERolloutPhase(namespace, name, v1beta1.RolloutPhaseHealthy) + waitMinReadyE2EDeploymentRestored(namespace) +} + +func waitMinReadyE2EDeploymentReady(namespace string) { + Eventually(func() bool { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + if err := k8sClient.Get(context.TODO(), key, deployment); err != nil { + return false + } + return deployment.Status.ReadyReplicas == *deployment.Spec.Replicas + }, 10*time.Minute, time.Second).Should(BeTrue()) +} + +func waitMinReadyE2ERolloutStepPaused(namespace, name string, step int32) { + Eventually(func() bool { + rollout := &v1beta1.Rollout{} + key := types.NamespacedName{Namespace: namespace, Name: name} + Expect(k8sClient.Get(context.TODO(), key, rollout)).NotTo(HaveOccurred()) + return rollout.Status.CanaryStatus != nil && + rollout.Status.CanaryStatus.CurrentStepIndex == step && + rollout.Status.CanaryStatus.CurrentStepState == v1beta1.CanaryStepStatePaused + }, 10*time.Minute, time.Second).Should(BeTrue()) +} + +func patchMinReadyE2EDeploymentReplicas(namespace string, replicas int32) { + Expect(retry.RetryOnConflict(retry.DefaultRetry, func() error { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + if err := k8sClient.Get(context.TODO(), key, deployment); err != nil { + return err + } + deployment.Spec.Replicas = pointer.Int32(replicas) + return k8sClient.Update(context.TODO(), deployment) + })).NotTo(HaveOccurred()) +} + +func patchMinReadyE2EMaxUnavailable(namespace string, value int) { + Expect(retry.RetryOnConflict(retry.DefaultRetry, func() error { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + if err := k8sClient.Get(context.TODO(), key, deployment); err != nil { + return err + } + maxUnavailable := intstr.FromInt(value) + deployment.Spec.Strategy.RollingUpdate.MaxUnavailable = &maxUnavailable + return k8sClient.Update(context.TODO(), deployment) + })).NotTo(HaveOccurred()) +} + +func deleteMinReadyE2EOriginalAnnotation(namespace, key string) { + Expect(retry.RetryOnConflict(retry.DefaultRetry, func() error { + deployment := &apps.Deployment{} + namespacedName := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + if err := k8sClient.Get(context.TODO(), namespacedName, deployment); err != nil { + return err + } + delete(deployment.Annotations, key) + return k8sClient.Update(context.TODO(), deployment) + })).NotTo(HaveOccurred()) +} + +func restartMinReadyE2EControllerManager() { + Expect(retry.RetryOnConflict(retry.DefaultRetry, func() error { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: "kruise-rollout", Name: "kruise-rollout-controller-manager"} + if err := k8sClient.Get(context.TODO(), key, deployment); err != nil { + return err + } + if deployment.Spec.Template.Annotations == nil { + deployment.Spec.Template.Annotations = map[string]string{} + } + deployment.Spec.Template.Annotations["rollouts.kruise.io/minready-e2e-restart"] = strconv.FormatInt(time.Now().UnixNano(), 10) + return k8sClient.Update(context.TODO(), deployment) + })).NotTo(HaveOccurred()) + waitMinReadyE2EWebhookEndpointReady() +} + +func expectMinReadyE2EDeploymentVersion(namespace, version string) { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + Expect(k8sClient.Get(context.TODO(), key, deployment)).Should(Succeed()) + got := "" + for _, env := range deployment.Spec.Template.Spec.Containers[0].Env { + if env.Name == "NODE_NAME" { + got = env.Value + } + } + Expect(got).Should(Equal(version)) +} + +func expectMinReadyE2ENoVersion2Pods(namespace string) { + pods := &corev1.PodList{} + Expect(k8sClient.List(context.TODO(), pods, client.InNamespace(namespace))).Should(Succeed()) + for _, pod := range pods.Items { + for _, container := range pod.Spec.Containers { + Expect(strings.Contains(container.Image, "version2")).Should(BeFalse()) + } + } +} diff --git a/test/e2e/deployment_minready_helpers_test.go b/test/e2e/deployment_minready_helpers_test.go new file mode 100644 index 00000000..f72d39a3 --- /dev/null +++ b/test/e2e/deployment_minready_helpers_test.go @@ -0,0 +1,220 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + "fmt" + "time" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/util/retry" + "k8s.io/utils/pointer" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/openkruise/rollouts/api/v1beta1" + partitiondeployment "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment" +) + +const minReadyE2EDeploymentName = "minready-demo" + +func newMinReadyE2EDeployment(namespace string) *apps.Deployment { + maxUnavailable := intstr.FromString("25%") + maxSurge := intstr.FromInt(1) + return &apps.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: minReadyE2EDeploymentName, + Namespace: namespace, + Labels: map[string]string{"app": minReadyE2EDeploymentName}, + }, + Spec: apps.DeploymentSpec{ + Replicas: pointer.Int32(5), + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": minReadyE2EDeploymentName}}, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"app": minReadyE2EDeploymentName}}, + Spec: corev1.PodSpec{Containers: []corev1.Container{{ + Name: "echoserver", + Image: "cilium/echoserver:latest", + ImagePullPolicy: corev1.PullIfNotPresent, + Env: []corev1.EnvVar{{Name: "NODE_NAME", Value: "version1"}}, + }}}, + }, + Strategy: apps.DeploymentStrategy{ + Type: apps.RollingUpdateDeploymentStrategyType, + RollingUpdate: &apps.RollingUpdateDeployment{ + MaxUnavailable: &maxUnavailable, + MaxSurge: &maxSurge, + }, + }, + }, + } +} + +func newMinReadyE2ERollout(namespace string) *v1beta1.Rollout { + return &v1beta1.Rollout{ + ObjectMeta: metav1.ObjectMeta{Name: "minready-rollout", Namespace: namespace}, + Spec: v1beta1.RolloutSpec{ + WorkloadRef: v1beta1.ObjectRef{APIVersion: "apps/v1", Kind: "Deployment", Name: minReadyE2EDeploymentName}, + Strategy: v1beta1.RolloutStrategy{Canary: &v1beta1.CanaryStrategy{ + DeploymentStrategy: v1beta1.DeploymentStrategyMinReadySeconds, + EnableExtraWorkloadForCanary: false, + Steps: []v1beta1.CanaryStep{ + {Replicas: intstrPtr(intstr.FromString("20%")), Pause: v1beta1.RolloutPause{}}, + {Replicas: intstrPtr(intstr.FromString("50%")), Pause: v1beta1.RolloutPause{}}, + {Replicas: intstrPtr(intstr.FromString("100%")), Pause: v1beta1.RolloutPause{Duration: pointer.Int32(0)}}, + }, + }}, + }, + } +} + +func newMinReadyE2EPDB(namespace string) *policyv1.PodDisruptionBudget { + return &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{Name: "minready-pdb", Namespace: namespace}, + Spec: policyv1.PodDisruptionBudgetSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": minReadyE2EDeploymentName}}, + MinAvailable: intstrPtr(intstr.FromInt(4)), + }, + } +} + +func intstrPtr(value intstr.IntOrString) *intstr.IntOrString { + return &value +} + +func createMinReadyE2EObject(object client.Object) { + By(fmt.Sprintf("create %T %s/%s", object, object.GetNamespace(), object.GetName())) + Expect(k8sClient.Create(context.TODO(), object)).NotTo(HaveOccurred()) +} + +func updateMinReadyE2EDeploymentVersion(namespace, version string) { + Expect(retry.RetryOnConflict(retry.DefaultRetry, func() error { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + if err := k8sClient.Get(context.TODO(), key, deployment); err != nil { + return err + } + deployment.Spec.Template.Spec.Containers[0].Env = mergeEnvVar( + deployment.Spec.Template.Spec.Containers[0].Env, + corev1.EnvVar{Name: "NODE_NAME", Value: version}, + ) + return k8sClient.Update(context.TODO(), deployment) + })).NotTo(HaveOccurred()) +} + +func resumeMinReadyE2ERollout(namespace, name string) { + resumedStep := int32(-1) + Eventually(func() bool { + rollout := &v1beta1.Rollout{} + key := types.NamespacedName{Namespace: namespace, Name: name} + Expect(k8sClient.Get(context.TODO(), key, rollout)).NotTo(HaveOccurred()) + if rollout.Status.Phase == v1beta1.RolloutPhaseHealthy { + return true + } + if rollout.Status.CanaryStatus == nil || rollout.Status.CanaryStatus.CurrentStepState != v1beta1.CanaryStepStatePaused { + return false + } + resumedStep = rollout.Status.CanaryStatus.CurrentStepIndex + body := fmt.Sprintf(`{"status":{"canaryStatus":{"currentStepState":"%s"}}}`, v1beta1.CanaryStepStateReady) + return k8sClient.Status().Patch(context.TODO(), rollout, client.RawPatch(types.MergePatchType, []byte(body))) == nil + }, 2*time.Minute, time.Second).Should(BeTrue()) + if resumedStep < 0 { + return + } + Eventually(func() bool { + rollout := &v1beta1.Rollout{} + key := types.NamespacedName{Namespace: namespace, Name: name} + Expect(k8sClient.Get(context.TODO(), key, rollout)).NotTo(HaveOccurred()) + if rollout.Status.Phase == v1beta1.RolloutPhaseHealthy { + return true + } + return rollout.Status.CanaryStatus != nil && + (rollout.Status.CanaryStatus.CurrentStepIndex != resumedStep || + rollout.Status.CanaryStatus.CurrentStepState != v1beta1.CanaryStepStatePaused) + }, 2*time.Minute, time.Second).Should(BeTrue()) +} + +func waitMinReadyE2ERolloutPhase(namespace, name string, phase v1beta1.RolloutPhase) { + Eventually(func() bool { + rollout := &v1beta1.Rollout{} + key := types.NamespacedName{Namespace: namespace, Name: name} + Expect(k8sClient.Get(context.TODO(), key, rollout)).NotTo(HaveOccurred()) + return rollout.Status.Phase == phase + }, 10*time.Minute, time.Second).Should(BeTrue()) +} + +func waitMinReadyE2EDeploymentInflated(namespace string) { + Eventually(func() bool { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + Expect(k8sClient.Get(context.TODO(), key, deployment)).NotTo(HaveOccurred()) + return deployment.Spec.Strategy.Type == apps.RollingUpdateDeploymentStrategyType && + deployment.Spec.MinReadySeconds == partitiondeployment.InflatedMinReadySeconds && + deployment.Spec.Strategy.RollingUpdate != nil + }, 5*time.Minute, time.Second).Should(BeTrue()) +} + +func waitMinReadyE2EDeploymentRestored(namespace string) { + Eventually(func() bool { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + Expect(k8sClient.Get(context.TODO(), key, deployment)).NotTo(HaveOccurred()) + return deployment.Spec.MinReadySeconds == 0 && + deployment.Spec.Strategy.Type == apps.RollingUpdateDeploymentStrategyType && + deployment.Annotations[partitiondeployment.AnnotationOriginalMinReadySeconds] == "" + }, 10*time.Minute, time.Second).Should(BeTrue()) +} + +func waitMinReadyE2EBatchCondition(namespace, name, reason string) { + Eventually(func() bool { + release := &v1beta1.BatchRelease{} + key := types.NamespacedName{Namespace: namespace, Name: name} + if err := k8sClient.Get(context.TODO(), key, release); err != nil { + if apierrors.IsNotFound(err) { + return false + } + Expect(err).NotTo(HaveOccurred()) + } + for _, condition := range release.Status.Conditions { + if condition.Reason == reason { + return true + } + } + return false + }, 5*time.Minute, time.Second).Should(BeTrue()) +} + +func startMinReadyE2ERollout(namespace string) *v1beta1.Rollout { + rollout := newMinReadyE2ERollout(namespace) + deployment := newMinReadyE2EDeployment(namespace) + createMinReadyE2EObject(rollout) + createMinReadyE2EObject(deployment) + waitMinReadyE2EDeploymentReady(namespace) + waitMinReadyE2ERolloutPhase(namespace, rollout.Name, v1beta1.RolloutPhaseHealthy) + updateMinReadyE2EDeploymentVersion(namespace, "version2") + waitMinReadyE2EDeploymentInflated(namespace) + waitMinReadyE2EBatchCondition(namespace, rollout.Name, "MinReadyInitialized") + return rollout +} diff --git a/test/e2e/deployment_minready_pdb_test.go b/test/e2e/deployment_minready_pdb_test.go new file mode 100644 index 00000000..4f5b8d45 --- /dev/null +++ b/test/e2e/deployment_minready_pdb_test.go @@ -0,0 +1,73 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/openkruise/rollouts/api/v1beta1" + partitiondeployment "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment" +) + +var _ = SIGDescribe("Deployment MinReadySeconds PDB", func() { + var namespace string + + BeforeEach(func() { + namespace = randomNamespaceName("deployment-minready-pdb") + ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespace}} + Expect(k8sClient.Create(context.TODO(), ns)).Should(Succeed()) + }) + + AfterEach(func() { + _ = k8sClient.DeleteAllOf(context.TODO(), &v1beta1.BatchRelease{}, client.InNamespace(namespace)) + _ = k8sClient.DeleteAllOf(context.TODO(), &v1beta1.Rollout{}, client.InNamespace(namespace)) + _ = k8sClient.DeleteAllOf(context.TODO(), &policyv1.PodDisruptionBudget{}, client.InNamespace(namespace)) + _ = k8sClient.DeleteAllOf(context.TODO(), &apps.Deployment{}, client.InNamespace(namespace)) + Expect(k8sClient.Delete(context.TODO(), &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespace}})).Should(Succeed()) + time.Sleep(3 * time.Second) + }) + + KruiseDescribe("MinReadySeconds PDB guard", func() { + It("records degraded status and leaves Deployment strategy untouched", func() { + rollout := newMinReadyE2ERollout(namespace) + deployment := newMinReadyE2EDeployment(namespace) + pdb := newMinReadyE2EPDB(namespace) + createMinReadyE2EObject(rollout) + createMinReadyE2EObject(deployment) + createMinReadyE2EObject(pdb) + + waitMinReadyE2EDeploymentReady(namespace) + waitMinReadyE2ERolloutPhase(namespace, rollout.Name, v1beta1.RolloutPhaseHealthy) + updateMinReadyE2EDeploymentVersion(namespace, "version2") + waitMinReadyE2EBatchCondition(namespace, rollout.Name, "MinReadyDegradedPDBIncompatible") + + got := &apps.Deployment{} + Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(deployment), got)).Should(Succeed()) + Expect(got.Spec.Strategy.Type).Should(Equal(apps.RollingUpdateDeploymentStrategyType)) + Expect(got.Spec.MinReadySeconds).ShouldNot(Equal(partitiondeployment.InflatedMinReadySeconds)) + }) + }) +}) diff --git a/test/e2e/deployment_minready_scenarios_helper_test.go b/test/e2e/deployment_minready_scenarios_helper_test.go new file mode 100644 index 00000000..7c540c7a --- /dev/null +++ b/test/e2e/deployment_minready_scenarios_helper_test.go @@ -0,0 +1,143 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + "fmt" + "time" + + . "github.com/onsi/gomega" + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/util/retry" + "k8s.io/utils/pointer" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/openkruise/rollouts/api/v1beta1" + partitiondeployment "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment" +) + +func waitMinReadyE2EDeploymentReplicas(namespace string, replicas int32) { + Eventually(func() bool { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + Expect(k8sClient.Get(context.TODO(), key, deployment)).Should(Succeed()) + return deployment.Status.Replicas == replicas && *deployment.Spec.Replicas == replicas + }, 5*time.Minute, time.Second).Should(BeTrue()) +} + +func deleteMinReadyE2ERollout(namespace, name string) { + rollout := &v1beta1.Rollout{} + key := types.NamespacedName{Namespace: namespace, Name: name} + Expect(k8sClient.Get(context.TODO(), key, rollout)).Should(Succeed()) + Expect(k8sClient.Delete(context.TODO(), rollout)).Should(Succeed()) +} + +func waitMinReadyE2ERolloutDeleted(namespace, name string) { + Eventually(func() bool { + rollout := &v1beta1.Rollout{} + key := types.NamespacedName{Namespace: namespace, Name: name} + return k8sClient.Get(context.TODO(), key, rollout) != nil + }, 5*time.Minute, time.Second).Should(BeTrue()) +} + +func restoreMinReadyE2EOriginalAnnotation(namespace, key, value string) { + Expect(retry.RetryOnConflict(retry.DefaultRetry, func() error { + deployment := &apps.Deployment{} + namespacedName := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + if err := k8sClient.Get(context.TODO(), namespacedName, deployment); err != nil { + return err + } + if deployment.Annotations == nil { + deployment.Annotations = map[string]string{} + } + deployment.Annotations[key] = value + return k8sClient.Update(context.TODO(), deployment) + })).NotTo(HaveOccurred()) +} + +func waitMinReadyE2EWebhookEndpointReady() { + Eventually(func() bool { + endpoints := &corev1.Endpoints{} + key := types.NamespacedName{Namespace: "kruise-rollout", Name: "kruise-rollout-webhook-service"} + if err := k8sClient.Get(context.TODO(), key, endpoints); err != nil { + return false + } + for _, subset := range endpoints.Subsets { + if len(subset.Addresses) > 0 { + return true + } + } + return false + }, 5*time.Minute, time.Second).Should(BeTrue()) +} + +func waitMinReadyE2EBatchMetricCondition(namespace, name, reason string) { + waitMinReadyE2EBatchCondition(namespace, name, reason) +} + +func waitMinReadyE2EEventReason(namespace, reason string) { + Eventually(func() bool { + events := &corev1.EventList{} + Expect(k8sClient.List(context.TODO(), events, client.InNamespace(namespace))).Should(Succeed()) + for _, event := range events.Items { + if event.Reason == reason { + return true + } + } + return false + }, 5*time.Minute, time.Second).Should(BeTrue()) +} + +func makeMinReadyE2ERolloutWithReplicas(namespace string, values ...string) *v1beta1.Rollout { + rollout := newMinReadyE2ERollout(namespace) + steps := make([]v1beta1.CanaryStep, 0, len(values)) + for _, value := range values { + steps = append(steps, v1beta1.CanaryStep{Replicas: intstrFromStringPtr(value), Pause: v1beta1.RolloutPause{}}) + } + rollout.Spec.Strategy.Canary.Steps = steps + return rollout +} + +func intstrFromStringPtr(value string) *intstr.IntOrString { + parsed := intstr.FromString(value) + return &parsed +} + +func expectMinReadyE2EInflatedMaxUnavailable(namespace string, want int32) { + Eventually(func() bool { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + Expect(k8sClient.Get(context.TODO(), key, deployment)).Should(Succeed()) + got := deployment.Spec.Strategy.RollingUpdate.MaxUnavailable + return got != nil && got.IntVal == want + }, 5*time.Minute, time.Second).Should(BeTrue(), fmt.Sprintf("want maxUnavailable %d", want)) +} + +func expectMinReadyE2EOriginalAnnotationAbsent(namespace string) { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + Expect(k8sClient.Get(context.TODO(), key, deployment)).Should(Succeed()) + Expect(deployment.Annotations[partitiondeployment.AnnotationOriginalMinReadySeconds]).Should(Equal("")) +} + +func setMinReadyE2EInitialReplicas(deployment *apps.Deployment, replicas int32) { + deployment.Spec.Replicas = pointer.Int32(replicas) +} diff --git a/test/e2e/deployment_minready_test.go b/test/e2e/deployment_minready_test.go new file mode 100644 index 00000000..3aa20a32 --- /dev/null +++ b/test/e2e/deployment_minready_test.go @@ -0,0 +1,136 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/openkruise/rollouts/api/v1beta1" + partitiondeployment "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment" +) + +var _ = SIGDescribe("Deployment MinReadySeconds", func() { + var namespace string + + BeforeEach(func() { + namespace = randomNamespaceName("deployment-minready") + ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespace}} + Expect(k8sClient.Create(context.TODO(), ns)).Should(Succeed()) + }) + + AfterEach(func() { + _ = k8sClient.DeleteAllOf(context.TODO(), &v1beta1.BatchRelease{}, client.InNamespace(namespace)) + _ = k8sClient.DeleteAllOf(context.TODO(), &v1beta1.Rollout{}, client.InNamespace(namespace)) + _ = k8sClient.DeleteAllOf(context.TODO(), &apps.Deployment{}, client.InNamespace(namespace)) + Expect(k8sClient.Delete(context.TODO(), &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespace}})).Should(Succeed()) + time.Sleep(3 * time.Second) + }) + + KruiseDescribe("MinReadySeconds deployment rollout", func() { + It("TC1 normal rollout keeps RollingUpdate and restores original fields", func() { + rollout := startMinReadyE2ERollout(namespace) + finishMinReadyE2ERollout(namespace, rollout.Name) + }) + + It("TC2 rollback returns to the stable template", func() { + rollout := newMinReadyE2ERollout(namespace) + deployment := newMinReadyE2EDeployment(namespace) + createMinReadyE2EObject(rollout) + createMinReadyE2EObject(deployment) + waitMinReadyE2EDeploymentReady(namespace) + waitMinReadyE2ERolloutPhase(namespace, rollout.Name, v1beta1.RolloutPhaseHealthy) + + updateMinReadyE2EDeploymentVersion(namespace, "version2") + waitMinReadyE2EDeploymentInflated(namespace) + updateMinReadyE2EDeploymentVersion(namespace, "version1") + waitMinReadyE2ERolloutPhase(namespace, rollout.Name, v1beta1.RolloutPhaseHealthy) + + expectMinReadyE2EDeploymentVersion(namespace, "version1") + }) + + It("TC3 controller restart resumes from the persisted MinReadySeconds state", func() { + rollout := startMinReadyE2ERollout(namespace) + waitMinReadyE2ERolloutStepPaused(namespace, rollout.Name, 1) + restartMinReadyE2EControllerManager() + + waitMinReadyE2EDeploymentInflated(namespace) + waitMinReadyE2ERolloutStepPaused(namespace, rollout.Name, 1) + finishMinReadyE2ERollout(namespace, rollout.Name) + }) + + It("TC4 scale changes remain safe while rollout is active", func() { + rollout := makeMinReadyE2ERolloutWithReplicas(namespace, "25%", "50%", "100%") + deployment := newMinReadyE2EDeployment(namespace) + setMinReadyE2EInitialReplicas(deployment, 4) + createMinReadyE2EObject(rollout) + createMinReadyE2EObject(deployment) + waitMinReadyE2EDeploymentReady(namespace) + waitMinReadyE2ERolloutPhase(namespace, rollout.Name, v1beta1.RolloutPhaseHealthy) + + updateMinReadyE2EDeploymentVersion(namespace, "version2") + waitMinReadyE2EDeploymentInflated(namespace) + waitMinReadyE2ERolloutStepPaused(namespace, rollout.Name, 1) + patchMinReadyE2EDeploymentReplicas(namespace, 8) + waitMinReadyE2EDeploymentReplicas(namespace, 8) + waitMinReadyE2ERolloutStepPaused(namespace, rollout.Name, 1) + resumeMinReadyE2ERollout(namespace, rollout.Name) + expectMinReadyE2EInflatedMaxUnavailable(namespace, 4) + finishMinReadyE2ERollout(namespace, rollout.Name) + }) + + It("TC5 deleting Rollout restores annotations and lets native RollingUpdate continue", func() { + rollout := startMinReadyE2ERollout(namespace) + deleteMinReadyE2ERollout(namespace, rollout.Name) + + waitMinReadyE2ERolloutDeleted(namespace, rollout.Name) + waitMinReadyE2EDeploymentRestored(namespace) + expectMinReadyE2EOriginalAnnotationAbsent(namespace) + }) + + It("TC6 GitOps drift records degraded status and preserves the external value", func() { + rollout := startMinReadyE2ERollout(namespace) + waitMinReadyE2ERolloutStepPaused(namespace, rollout.Name, 1) + patchMinReadyE2EMaxUnavailable(namespace, 5) + resumeMinReadyE2ERollout(namespace, rollout.Name) + + waitMinReadyE2EBatchMetricCondition(namespace, rollout.Name, "MinReadyDegradedDriftDetected") + waitMinReadyE2EEventReason(namespace, "MinReadyDegradedDriftDetected") + expectMinReadyE2EInflatedMaxUnavailable(namespace, 5) + }) + + It("TC7 missing annotation blocks finalize until the operator restores it", func() { + rollout := startMinReadyE2ERollout(namespace) + deleteMinReadyE2EOriginalAnnotation(namespace, partitiondeployment.AnnotationOriginalMaxSurge) + resumeMinReadyE2ERollout(namespace, rollout.Name) + resumeMinReadyE2ERollout(namespace, rollout.Name) + waitMinReadyE2EBatchCondition(namespace, rollout.Name, "MinReadyDegradedMissingAnnotations") + waitMinReadyE2EEventReason(namespace, "MinReadyDegradedMissingAnnotations") + + restoreMinReadyE2EOriginalAnnotation(namespace, partitiondeployment.AnnotationOriginalMaxSurge, "1") + waitMinReadyE2ERolloutPhase(namespace, rollout.Name, v1beta1.RolloutPhaseHealthy) + waitMinReadyE2EDeploymentRestored(namespace) + }) + }) +}) diff --git a/test/integration/concurrency_test.go b/test/integration/concurrency_test.go new file mode 100644 index 00000000..63ea00d9 --- /dev/null +++ b/test/integration/concurrency_test.go @@ -0,0 +1,111 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration + +import ( + "strings" + "testing" + + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/tools/record" + "k8s.io/utils/pointer" + + "github.com/openkruise/rollouts/api/v1beta1" + partitiondeployment "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment" + "github.com/openkruise/rollouts/pkg/feature" + utilfeature "github.com/openkruise/rollouts/pkg/util/feature" +) + +func TestDeploymentMinReadyConcurrentScaleUsesLatestReplicas(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := newIntegrationMinReadyRelease() + release.Status.CanaryStatus.CurrentBatch = 1 + deployment := newInflatedIntegrationDeployment() + deployment.Spec.Replicas = pointer.Int32(20) + deployment.Status.Replicas = 20 + deployment.Status.UpdatedReplicas = 10 + deployment.Status.ReadyReplicas = 10 + recorder := record.NewFakeRecorder(20) + cli := newIntegrationClient(release, deployment) + status := release.Status.DeepCopy() + control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) + + if err := control.UpgradeBatch(); err != nil { + t.Fatalf("UpgradeBatch failed: %v", err) + } + + got := fetchIntegrationDeployment(t, cli, deployment) + if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 10 { + t.Fatalf("maxUnavailable = %v, want 10 after scale to 20 replicas", unavailable) + } + assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyBatching, corev1.ConditionTrue, "MinReadyBatching") +} + +func TestDeploymentMinReadyConcurrentGitOpsDriftIsDegraded(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := newIntegrationMinReadyRelease() + release.Status.CanaryStatus.CurrentBatch = 1 + deployment := newInflatedIntegrationDeployment() + driftedMaxUnavailable := intstr.FromInt(6) + deployment.Spec.Strategy.RollingUpdate.MaxUnavailable = &driftedMaxUnavailable + deployment.Status.Replicas = 10 + deployment.Status.UpdatedReplicas = 5 + deployment.Status.ReadyReplicas = 5 + recorder := record.NewFakeRecorder(20) + cli := newIntegrationClient(release, deployment) + status := release.Status.DeepCopy() + control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) + + err := control.UpgradeBatch() + if err == nil || !strings.Contains(err.Error(), partitiondeployment.EventDegradedDriftDetected) { + t.Fatalf("UpgradeBatch error = %v, want drift detected", err) + } + + got := fetchIntegrationDeployment(t, cli, deployment) + if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 6 { + t.Fatalf("maxUnavailable = %v, want drifted value preserved", unavailable) + } + assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyDegraded, corev1.ConditionTrue, "MinReadyDegradedDriftDetected") + assertIntegrationEvent(t, recorder, "MinReadyDegradedDriftDetected") +} + +func TestDeploymentMinReadyConcurrentAnnotationDeletionBlocksFinalize(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := newIntegrationMinReadyRelease() + deployment := newInflatedIntegrationDeployment() + delete(deployment.Annotations, partitiondeployment.AnnotationOriginalMaxSurge) + recorder := record.NewFakeRecorder(20) + cli := newIntegrationClient(release, deployment) + status := release.Status.DeepCopy() + control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) + + err := control.Finalize() + if err == nil || !strings.Contains(err.Error(), partitiondeployment.AnnotationOriginalMaxSurge) { + t.Fatalf("Finalize error = %v, want missing annotation", err) + } + + got := fetchIntegrationDeployment(t, cli, deployment) + if got.Spec.MinReadySeconds != partitiondeployment.InflatedMinReadySeconds { + t.Fatalf("minReadySeconds = %d, want inflated value preserved", got.Spec.MinReadySeconds) + } + assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyDegraded, corev1.ConditionTrue, "MinReadyDegradedMissingAnnotations") + assertIntegrationEvent(t, recorder, "MinReadyDegradedMissingAnnotations") +} + +var _ = apps.RollingUpdateDeploymentStrategyType diff --git a/test/integration/deployment_minready_test.go b/test/integration/deployment_minready_test.go new file mode 100644 index 00000000..3d83c0e4 --- /dev/null +++ b/test/integration/deployment_minready_test.go @@ -0,0 +1,169 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration + +import ( + "strings" + "testing" + + corev1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/record" + + "github.com/openkruise/rollouts/api/v1beta1" + partitiondeployment "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment" + "github.com/openkruise/rollouts/pkg/feature" + utilfeature "github.com/openkruise/rollouts/pkg/util/feature" +) + +func TestDeploymentMinReadyControlPlaneInitialize(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := newIntegrationMinReadyRelease() + deployment := newIntegrationDeployment() + recorder := record.NewFakeRecorder(20) + cli := newIntegrationClient(release, deployment) + status := release.Status.DeepCopy() + control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) + + if err := control.Initialize(); err != nil { + t.Fatalf("Initialize failed: %v", err) + } + + got := fetchIntegrationDeployment(t, cli, deployment) + assertInflatedDeployment(t, got) + assertOriginalAnnotation(t, got, partitiondeployment.AnnotationOriginalMinReadySeconds, "5") + assertOriginalAnnotation(t, got, partitiondeployment.AnnotationOriginalProgressDeadlineSeconds, "60") + assertOriginalAnnotation(t, got, partitiondeployment.AnnotationOriginalMaxUnavailable, "25%") + assertOriginalAnnotation(t, got, partitiondeployment.AnnotationOriginalMaxSurge, "1") + assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyInitialized, corev1.ConditionTrue, "MinReadyInitialized") + assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyDegraded, corev1.ConditionFalse, "MinReadyHealthy") + assertIntegrationEvent(t, recorder, "MinReadyInitialized") +} + +func TestDeploymentMinReadyControlPlaneRejectsFeatureGateDisabled(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=false") + release := newIntegrationMinReadyRelease() + deployment := newIntegrationDeployment() + recorder := record.NewFakeRecorder(20) + cli := newIntegrationClient(release, deployment) + status := release.Status.DeepCopy() + control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) + + err := control.Initialize() + if err == nil || !strings.Contains(err.Error(), "feature gate is disabled") { + t.Fatalf("Initialize error = %v, want feature gate disabled", err) + } + + got := fetchIntegrationDeployment(t, cli, deployment) + if got.Spec.MinReadySeconds != deployment.Spec.MinReadySeconds { + t.Fatalf("minReadySeconds = %d, want unchanged %d", got.Spec.MinReadySeconds, deployment.Spec.MinReadySeconds) + } + assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyDegraded, corev1.ConditionTrue, "MinReadyFeatureGateDisabled") + assertIntegrationEvent(t, recorder, "MinReadyFeatureGateDisabled") +} + +func TestDeploymentMinReadyControlPlaneRejectsCoveringPDB(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := newIntegrationMinReadyRelease() + deployment := newIntegrationDeployment() + pdb := &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{Name: "demo-pdb", Namespace: deployment.Namespace}, + Spec: policyv1.PodDisruptionBudgetSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "demo"}}, + }, + } + recorder := record.NewFakeRecorder(20) + cli := newIntegrationClient(release, deployment, pdb) + status := release.Status.DeepCopy() + control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) + + err := control.Initialize() + if err == nil || !strings.Contains(err.Error(), partitiondeployment.EventDegradedPDBIncompatible) { + t.Fatalf("Initialize error = %v, want PDB incompatible", err) + } + + got := fetchIntegrationDeployment(t, cli, deployment) + if got.Spec.MinReadySeconds != deployment.Spec.MinReadySeconds { + t.Fatalf("minReadySeconds = %d, want unchanged %d", got.Spec.MinReadySeconds, deployment.Spec.MinReadySeconds) + } + assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyDegraded, corev1.ConditionTrue, "MinReadyDegradedPDBIncompatible") + assertIntegrationEvent(t, recorder, "MinReadyDegradedPDBIncompatible") +} + +func TestDeploymentMinReadyControlPlaneUpgradeBatchUsesReadyReplicas(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := newIntegrationMinReadyRelease() + release.Status.CanaryStatus.CurrentBatch = 1 + deployment := newInflatedIntegrationDeployment() + deployment.Status.Replicas = 10 + deployment.Status.UpdatedReplicas = 5 + deployment.Status.ReadyReplicas = 5 + recorder := record.NewFakeRecorder(20) + cli := newIntegrationClient(release, deployment) + status := release.Status.DeepCopy() + control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) + + if err := control.UpgradeBatch(); err != nil { + t.Fatalf("UpgradeBatch failed: %v", err) + } + if err := control.EnsureBatchPodsReadyAndLabeled(); err != nil { + t.Fatalf("EnsureBatchPodsReadyAndLabeled failed: %v", err) + } + + got := fetchIntegrationDeployment(t, cli, deployment) + if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 5 { + t.Fatalf("maxUnavailable = %v, want 5", unavailable) + } + assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyBatching, corev1.ConditionTrue, "MinReadyBatchReady") + assertIntegrationEvent(t, recorder, "MinReadyBatchReady") +} + +func TestDeploymentMinReadyControlPlaneFinalizeRestoresOriginalFields(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := newIntegrationMinReadyRelease() + deployment := newInflatedIntegrationDeployment() + recorder := record.NewFakeRecorder(20) + cli := newIntegrationClient(release, deployment) + status := release.Status.DeepCopy() + control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) + + if err := control.Finalize(); err != nil { + t.Fatalf("Finalize failed: %v", err) + } + + got := fetchIntegrationDeployment(t, cli, deployment) + if got.Spec.MinReadySeconds != 5 { + t.Fatalf("minReadySeconds = %d, want 5", got.Spec.MinReadySeconds) + } + if got.Spec.ProgressDeadlineSeconds == nil || *got.Spec.ProgressDeadlineSeconds != 60 { + t.Fatalf("progressDeadlineSeconds = %v, want 60", got.Spec.ProgressDeadlineSeconds) + } + if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.StrVal != "25%" { + t.Fatalf("maxUnavailable = %v, want 25%%", unavailable) + } + if surge := got.Spec.Strategy.RollingUpdate.MaxSurge; surge == nil || surge.IntVal != 1 { + t.Fatalf("maxSurge = %v, want 1", surge) + } + for _, key := range partitiondeployment.AllOriginalAnnotations { + if _, ok := got.Annotations[key]; ok { + t.Fatalf("annotation %s still exists", key) + } + } + assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyFinalized, corev1.ConditionTrue, "MinReadyFinalized") + assertIntegrationEvent(t, recorder, "MinReadyFinalized") +} diff --git a/test/integration/minready_helpers_test.go b/test/integration/minready_helpers_test.go new file mode 100644 index 00000000..6a5430b2 --- /dev/null +++ b/test/integration/minready_helpers_test.go @@ -0,0 +1,236 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration + +import ( + "context" + "strings" + "testing" + + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/client-go/tools/record" + "k8s.io/utils/pointer" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + rolloutapi "github.com/openkruise/rollouts/api" + "github.com/openkruise/rollouts/api/v1beta1" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle" + partitiondeployment "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment" +) + +var integrationScheme = runtime.NewScheme() + +func init() { + utilruntime.Must(apps.AddToScheme(integrationScheme)) + utilruntime.Must(corev1.AddToScheme(integrationScheme)) + utilruntime.Must(policyv1.AddToScheme(integrationScheme)) + utilruntime.Must(rolloutapi.AddToScheme(integrationScheme)) +} + +func newIntegrationMinReadyRelease() *v1beta1.BatchRelease { + return &v1beta1.BatchRelease{ + TypeMeta: metav1.TypeMeta{APIVersion: v1beta1.GroupVersion.String(), Kind: "BatchRelease"}, + ObjectMeta: metav1.ObjectMeta{ + Name: "demo-release", + Namespace: "default", + }, + Spec: v1beta1.BatchReleaseSpec{ + WorkloadRef: v1beta1.ObjectRef{APIVersion: apps.SchemeGroupVersion.String(), Kind: "Deployment", Name: "demo"}, + ReleasePlan: v1beta1.ReleasePlan{ + RollingStyle: v1beta1.PartitionRollingStyle, + DeploymentStrategy: v1beta1.DeploymentStrategyMinReadySeconds, + Batches: []v1beta1.ReleaseBatch{ + {CanaryReplicas: intstr.FromString("20%")}, + {CanaryReplicas: intstr.FromString("50%")}, + {CanaryReplicas: intstr.FromString("100%")}, + }, + }, + }, + Status: v1beta1.BatchReleaseStatus{ + Phase: v1beta1.RolloutPhasePreparing, + StableRevision: "stable", + UpdateRevision: "updated", + }, + } +} + +func newIntegrationDeployment() *apps.Deployment { + progressDeadlineSeconds := int32(60) + maxUnavailable := intstr.FromString("25%") + maxSurge := intstr.FromInt(1) + return &apps.Deployment{ + TypeMeta: metav1.TypeMeta{APIVersion: apps.SchemeGroupVersion.String(), Kind: "Deployment"}, + ObjectMeta: metav1.ObjectMeta{ + Name: "demo", + Namespace: "default", + ResourceVersion: "1", + Labels: map[string]string{"app": "demo"}, + }, + Spec: apps.DeploymentSpec{ + Replicas: pointer.Int32(10), + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "demo"}}, + Template: newIntegrationPodTemplate(), + MinReadySeconds: 5, + ProgressDeadlineSeconds: &progressDeadlineSeconds, + Strategy: apps.DeploymentStrategy{ + Type: apps.RollingUpdateDeploymentStrategyType, + RollingUpdate: &apps.RollingUpdateDeployment{ + MaxUnavailable: &maxUnavailable, + MaxSurge: &maxSurge, + }, + }, + }, + Status: apps.DeploymentStatus{ + Replicas: 10, + ReadyReplicas: 10, + UpdatedReplicas: 0, + AvailableReplicas: 10, + }, + } +} + +func newInflatedIntegrationDeployment() *apps.Deployment { + deployment := newIntegrationDeployment() + progressDeadlineSeconds := partitiondeployment.InflatedProgressDeadlineSeconds + maxUnavailable := intstr.FromInt(0) + maxSurge := intstr.FromInt(int(partitiondeployment.InflatedMaxSurgeInt)) + deployment.Spec.MinReadySeconds = partitiondeployment.InflatedMinReadySeconds + deployment.Spec.ProgressDeadlineSeconds = &progressDeadlineSeconds + deployment.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{ + MaxUnavailable: &maxUnavailable, + MaxSurge: &maxSurge, + } + deployment.Annotations = map[string]string{ + partitiondeployment.AnnotationOriginalMinReadySeconds: "5", + partitiondeployment.AnnotationOriginalProgressDeadlineSeconds: "60", + partitiondeployment.AnnotationOriginalMaxUnavailable: "25%", + partitiondeployment.AnnotationOriginalMaxSurge: "1", + } + return deployment +} + +func newIntegrationPodTemplate() corev1.PodTemplateSpec { + return corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"app": "demo"}}, + Spec: corev1.PodSpec{Containers: []corev1.Container{{ + Name: "main", + Image: "busybox:v2", + }}}, + } +} + +func newIntegrationClient(objects ...client.Object) client.Client { + return fake.NewClientBuilder(). + WithScheme(integrationScheme). + WithObjects(objects...). + WithStatusSubresource(&v1beta1.BatchRelease{}). + Build() +} + +func newIntegrationMinReadyControl( + cli client.Client, + recorder record.EventRecorder, + release *v1beta1.BatchRelease, + status *v1beta1.BatchReleaseStatus, + deploymentName string, +) interface { + Initialize() error + UpgradeBatch() error + EnsureBatchPodsReadyAndLabeled() error + Finalize() error +} { + return partitionstyle.NewControlPlane( + partitiondeployment.NewMinReadyController, + cli, + recorder, + release, + status, + types.NamespacedName{Namespace: release.Namespace, Name: deploymentName}, + apps.SchemeGroupVersion.WithKind("Deployment"), + ) +} + +func fetchIntegrationDeployment(t *testing.T, cli client.Client, deployment *apps.Deployment) *apps.Deployment { + t.Helper() + got := &apps.Deployment{} + key := types.NamespacedName{Namespace: deployment.Namespace, Name: deployment.Name} + if err := cli.Get(context.TODO(), key, got); err != nil { + t.Fatalf("Get deployment failed: %v", err) + } + return got +} + +func assertInflatedDeployment(t *testing.T, deployment *apps.Deployment) { + t.Helper() + if deployment.Spec.Strategy.Type != apps.RollingUpdateDeploymentStrategyType { + t.Fatalf("strategy.type = %q, want RollingUpdate", deployment.Spec.Strategy.Type) + } + if deployment.Spec.MinReadySeconds != partitiondeployment.InflatedMinReadySeconds { + t.Fatalf("minReadySeconds = %d, want %d", deployment.Spec.MinReadySeconds, partitiondeployment.InflatedMinReadySeconds) + } + if unavailable := deployment.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 0 { + t.Fatalf("maxUnavailable = %v, want 0", unavailable) + } + if surge := deployment.Spec.Strategy.RollingUpdate.MaxSurge; surge == nil || surge.IntVal != partitiondeployment.InflatedMaxSurgeInt { + t.Fatalf("maxSurge = %v, want %d", surge, partitiondeployment.InflatedMaxSurgeInt) + } +} + +func assertOriginalAnnotation(t *testing.T, deployment *apps.Deployment, key, want string) { + t.Helper() + if got := deployment.Annotations[key]; got != want { + t.Fatalf("annotation %s = %q, want %q", key, got, want) + } +} + +func assertIntegrationCondition( + t *testing.T, + status *v1beta1.BatchReleaseStatus, + condType v1beta1.RolloutConditionType, + condStatus corev1.ConditionStatus, + reason string, +) { + t.Helper() + for _, condition := range status.Conditions { + if condition.Type == condType && condition.Status == condStatus && condition.Reason == reason { + return + } + } + t.Fatalf("condition %s with %s/%s not found in %#v", condType, condStatus, reason, status.Conditions) +} + +func assertIntegrationEvent(t *testing.T, recorder *record.FakeRecorder, want string) { + t.Helper() + for { + select { + case event := <-recorder.Events: + if strings.Contains(event, want) { + return + } + default: + t.Fatalf("event %q not recorded", want) + } + } +} From 0f6383039ea9f356ea43f303db42d86a4da76a96 Mon Sep 17 00:00:00 2001 From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Date: Sat, 30 May 2026 00:30:53 +0800 Subject: [PATCH 02/22] fix: correct MinReady batch-ready semantics and feature-gate fallback CalculateBatchContext now counts only updated-revision Ready pods, with a matching-ReplicaSet readyReplicas fallback, instead of Deployment status.readyReplicas which also counted old-revision Ready pods and could mark a batch ready before the new pods were actually ready. List owned pods when RolloutID is set, and require non-empty pods once a batch label is expected. The executor falls back to the Recreate controller when the MinReadySecondsStrategy feature gate is disabled, matching the webhook skip. Add negative unit and integration coverage for these cases. Signed-off-by: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> --- .../batchrelease_controller_test.go | 15 ++- .../batchrelease/batchrelease_executor.go | 8 +- .../batchrelease/context/context.go | 5 +- .../batchrelease/context/context_test.go | 9 ++ .../partitionstyle/deployment/control_test.go | 1 + .../deployment/minready_batch_context.go | 91 +++++++++++++++++ .../deployment/minready_control.go | 33 +++---- .../deployment/minready_control_test.go | 99 +++++++++++++++++-- .../deployment/minready_test_helpers_test.go | 85 ++++++++++++++++ test/integration/deployment_minready_test.go | 27 ++++- test/integration/minready_helpers_test.go | 31 ++++++ 11 files changed, 370 insertions(+), 34 deletions(-) create mode 100644 pkg/controller/batchrelease/control/partitionstyle/deployment/minready_batch_context.go diff --git a/pkg/controller/batchrelease/batchrelease_controller_test.go b/pkg/controller/batchrelease/batchrelease_controller_test.go index e2016793..63e7619a 100644 --- a/pkg/controller/batchrelease/batchrelease_controller_test.go +++ b/pkg/controller/batchrelease/batchrelease_controller_test.go @@ -829,7 +829,7 @@ func TestReconcile_Deployment(t *testing.T) { } } -func TestExecutorRoutesMinReadyDeploymentStrategy(t *testing.T) { +func TestExecutorFallsBackToRecreateWhenMinReadyFeatureGateDisabled(t *testing.T) { _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=false") release := releaseDeploy.DeepCopy() release.Spec.ReleasePlan.RollingStyle = v1beta1.PartitionRollingStyle @@ -846,9 +846,16 @@ func TestExecutorRoutesMinReadyDeploymentStrategy(t *testing.T) { if err != nil { t.Fatalf("getReleaseController failed: %v", err) } - err = controller.Initialize() - if err == nil || !strings.Contains(err.Error(), "feature gate is disabled") { - t.Fatalf("Initialize error = %v, want MinReady feature gate disabled", err) + if err := controller.Initialize(); err != nil { + t.Fatalf("Initialize failed: %v", err) + } + + got := &apps.Deployment{} + if err := cli.Get(context.TODO(), client.ObjectKeyFromObject(deployment), got); err != nil { + t.Fatalf("Get deployment failed: %v", err) + } + if got.Spec.Strategy.Type != apps.RecreateDeploymentStrategyType { + t.Fatalf("strategy.type = %q, want Recreate fallback when feature gate disabled", got.Spec.Strategy.Type) } } diff --git a/pkg/controller/batchrelease/batchrelease_executor.go b/pkg/controller/batchrelease/batchrelease_executor.go index aa03d46b..b62627f5 100644 --- a/pkg/controller/batchrelease/batchrelease_executor.go +++ b/pkg/controller/batchrelease/batchrelease_executor.go @@ -45,8 +45,10 @@ import ( partitiondeployment "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment" "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/nativedaemonset" "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/statefulset" + "github.com/openkruise/rollouts/pkg/feature" "github.com/openkruise/rollouts/pkg/util" "github.com/openkruise/rollouts/pkg/util/errors" + utilfeature "github.com/openkruise/rollouts/pkg/util/feature" ) const ( @@ -244,10 +246,14 @@ func (r *Executor) getReleaseController(release *v1beta1.BatchRelease, newStatus return partitionstyle.NewControlPlane(cloneset.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil } if targetRef.APIVersion == apps.SchemeGroupVersion.String() && targetRef.Kind == reflect.TypeOf(apps.Deployment{}).Name() { - if release.Spec.ReleasePlan.DeploymentStrategy == v1beta1.DeploymentStrategyMinReadySeconds { + if release.Spec.ReleasePlan.DeploymentStrategy == v1beta1.DeploymentStrategyMinReadySeconds && + utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) { klog.InfoS("Using Deployment MinReadySeconds partition-style release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) return partitionstyle.NewControlPlane(partitiondeployment.NewMinReadyController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil } + if release.Spec.ReleasePlan.DeploymentStrategy == v1beta1.DeploymentStrategyMinReadySeconds { + klog.InfoS("MinReadySecondsStrategy feature gate disabled, using Recreate deployment controller", "workload name", targetKey.Name, "namespace", targetKey.Namespace) + } klog.InfoS("Using Deployment partition-style release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) return partitionstyle.NewControlPlane(partitiondeployment.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil } diff --git a/pkg/controller/batchrelease/context/context.go b/pkg/controller/batchrelease/context/context.go index 2078c2d3..e34d256c 100644 --- a/pkg/controller/batchrelease/context/context.go +++ b/pkg/controller/batchrelease/context/context.go @@ -97,9 +97,12 @@ func (bc *BatchContext) IsBatchReady() error { // batchLabelSatisfied return true if the expected batch label has been patched func batchLabelSatisfied(pods []*corev1.Pod, rolloutID string, targetCount int32) bool { - if rolloutID == "" || len(pods) == 0 { + if rolloutID == "" || targetCount <= 0 { return true } + if len(pods) == 0 { + return false + } patchedCount := util.WrappedPodCount(pods, func(pod *corev1.Pod) bool { if !pod.DeletionTimestamp.IsZero() { return false diff --git a/pkg/controller/batchrelease/context/context_test.go b/pkg/controller/batchrelease/context/context_test.go index 3d3941cd..e1515c20 100644 --- a/pkg/controller/batchrelease/context/context_test.go +++ b/pkg/controller/batchrelease/context/context_test.go @@ -125,6 +125,15 @@ func TestIsBatchReady(t *testing.T) { updatedReady: 5, isReady: false, }, + "false: rollout-id, no pods listed": { + release: r(p(intstr.FromInt(1)), "1", "version-1"), + pods: nil, + labelDesired: 5, + desired: 5, + updated: 5, + updatedReady: 5, + isReady: false, + }, } for name, cs := range cases { diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/control_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/control_test.go index 5d3ab77e..7116d7c8 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/control_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/control_test.go @@ -151,6 +151,7 @@ var ( func init() { apps.AddToScheme(scheme) + corev1.AddToScheme(scheme) rolloutapi.AddToScheme(scheme) kruiseappsv1alpha1.AddToScheme(scheme) } diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_batch_context.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_batch_context.go new file mode 100644 index 00000000..23207823 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_batch_context.go @@ -0,0 +1,91 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "context" + "fmt" + + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/openkruise/rollouts/pkg/util" +) + +func (mc *MinReadyControl) minReadyUpdatedReadyReplicas(updateRevision string) (int32, error) { + if len(mc.pods) > 0 { + return countUpdatedReadyPods(mc.pods, updateRevision), nil + } + return mc.updatedReadyReplicasFromReplicaSet(updateRevision) +} + +func countUpdatedReadyPods(pods []*corev1.Pod, updateRevision string) int32 { + return int32(util.WrappedPodCount(pods, func(pod *corev1.Pod) bool { + if !pod.DeletionTimestamp.IsZero() { + return false + } + return util.IsConsistentWithRevision(pod.Labels, updateRevision) && util.IsPodReady(pod) + })) +} + +func (mc *MinReadyControl) updatedReadyReplicasFromReplicaSet(updateRevision string) (int32, error) { + rsList := &apps.ReplicaSetList{} + if err := mc.client.List(context.TODO(), rsList, client.InNamespace(mc.object.Namespace)); err != nil { + return 0, fmt.Errorf("list ReplicaSets: %w", err) + } + + var ready int32 + for i := range rsList.Items { + rs := &rsList.Items[i] + if !metav1.IsControlledBy(rs, mc.object) { + continue + } + if !replicaSetMatchesUpdateRevision(rs, updateRevision) { + continue + } + ready += rs.Status.ReadyReplicas + } + return ready, nil +} + +func replicaSetMatchesUpdateRevision(rs *apps.ReplicaSet, updateRevision string) bool { + if util.ComputeHash(&rs.Spec.Template, nil) == updateRevision { + return true + } + return util.IsConsistentWithRevision(rs.Labels, updateRevision) +} + +func minReadyDesiredUpdatedReplicas(desired intstr.IntOrString, deployment *apps.Deployment) (int32, error) { + if deployment.Spec.Replicas == nil { + return 0, fmt.Errorf("deployment replicas is nil") + } + replicas := int(*deployment.Spec.Replicas) + target, err := intstr.GetScaledValueFromIntOrPercent(&desired, replicas, true) + if err != nil { + return 0, err + } + if target < 0 { + return 0, nil + } + if target > replicas { + return int32(replicas), nil + } + return int32(target), nil +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go index c4a879e7..c956fb05 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go @@ -115,19 +115,30 @@ func (mc *MinReadyControl) Finalize(_ *v1beta1.BatchRelease) error { } func (mc *MinReadyControl) CalculateBatchContext(release *v1beta1.BatchRelease) (*batchcontext.BatchContext, error) { + rolloutID := release.Spec.ReleasePlan.RolloutID + if rolloutID != "" { + if _, err := mc.ListOwnedPods(); err != nil { + return nil, fmt.Errorf("MinReadyControl.CalculateBatchContext: %w", err) + } + } + currentBatch := release.Status.CanaryStatus.CurrentBatch desiredPartition := release.Spec.ReleasePlan.Batches[currentBatch].CanaryReplicas desiredUpdatedReplicas, err := minReadyDesiredUpdatedReplicas(desiredPartition, mc.object) if err != nil { return nil, fmt.Errorf("MinReadyControl.CalculateBatchContext: %w", err) } + updatedReadyReplicas, err := mc.minReadyUpdatedReadyReplicas(release.Status.UpdateRevision) + if err != nil { + return nil, fmt.Errorf("MinReadyControl.CalculateBatchContext: %w", err) + } return &batchcontext.BatchContext{ - RolloutID: release.Spec.ReleasePlan.RolloutID, + RolloutID: rolloutID, CurrentBatch: currentBatch, UpdateRevision: release.Status.UpdateRevision, Replicas: mc.Replicas, UpdatedReplicas: mc.object.Status.UpdatedReplicas, - UpdatedReadyReplicas: mc.object.Status.ReadyReplicas, + UpdatedReadyReplicas: updatedReadyReplicas, PlannedUpdatedReplicas: desiredUpdatedReplicas, DesiredUpdatedReplicas: desiredUpdatedReplicas, DesiredPartition: desiredPartition, @@ -221,24 +232,6 @@ func ensureInflatedDeploymentStrategy(deployment *apps.Deployment) error { return nil } -func minReadyDesiredUpdatedReplicas(desired intstr.IntOrString, deployment *apps.Deployment) (int32, error) { - if deployment.Spec.Replicas == nil { - return 0, fmt.Errorf("deployment replicas is nil") - } - replicas := int(*deployment.Spec.Replicas) - target, err := intstr.GetScaledValueFromIntOrPercent(&desired, replicas, true) - if err != nil { - return 0, err - } - if target < 0 { - return 0, nil - } - if target > replicas { - return int32(replicas), nil - } - return int32(target), nil -} - type originalDeploymentStrategy struct { minReadySeconds *int32 progressDeadlineSeconds *int32 diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go index 58e17b73..3db89d99 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go @@ -27,6 +27,7 @@ import ( batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" "github.com/openkruise/rollouts/pkg/feature" + "github.com/openkruise/rollouts/pkg/util" utilfeature "github.com/openkruise/rollouts/pkg/util/feature" ) @@ -182,15 +183,17 @@ func TestMinReadyUpgradeBatchUpdatesMaxUnavailableOnly(t *testing.T) { } } -func TestMinReadyCalculateBatchContextUsesReadyReplicas(t *testing.T) { +func TestMinReadyCalculateBatchContextUsesUpdatedReadyReplicas(t *testing.T) { release := releaseDemo.DeepCopy() release.Status.CanaryStatus.CurrentBatch = 1 - release.Status.UpdateRevision = "version-2" + updateRevision := util.ComputeHash(&deploymentDemo.Spec.Template, nil) + release.Status.UpdateRevision = updateRevision deployment := newMinReadyDeployment() deployment.Status.Replicas = 10 deployment.Status.UpdatedReplicas = 5 deployment.Status.ReadyReplicas = 5 - control := newBuiltMinReadyControl(t, deployment) + rs := newMinReadyReplicaSet(deployment, updateRevision, 5, 5) + control := newBuiltMinReadyControl(t, deployment, rs) ctx, err := control.CalculateBatchContext(release) if err != nil { @@ -201,21 +204,102 @@ func TestMinReadyCalculateBatchContextUsesReadyReplicas(t *testing.T) { t.Fatalf("desired/planned = %d/%d, want 5/5", ctx.DesiredUpdatedReplicas, ctx.PlannedUpdatedReplicas) } if ctx.UpdatedReadyReplicas != 5 { - t.Fatalf("UpdatedReadyReplicas = %d, want ReadyReplicas 5", ctx.UpdatedReadyReplicas) + t.Fatalf("UpdatedReadyReplicas = %d, want updated RS ready 5", ctx.UpdatedReadyReplicas) } if err := ctx.IsBatchReady(); err != nil { t.Fatalf("IsBatchReady failed: %v", err) } } +func TestMinReadyCalculateBatchContextIgnoresOldReadyPods(t *testing.T) { + release := releaseDemo.DeepCopy() + release.Status.CanaryStatus.CurrentBatch = 1 + updateRevision := util.ComputeHash(&deploymentDemo.Spec.Template, nil) + release.Status.UpdateRevision = updateRevision + deployment := newMinReadyDeployment() + deployment.Status.Replicas = 10 + deployment.Status.UpdatedReplicas = 5 + deployment.Status.ReadyReplicas = 10 + rs := newMinReadyReplicaSet(deployment, updateRevision, 5, 1) + control := newBuiltMinReadyControl(t, deployment, rs) + + ctx, err := control.CalculateBatchContext(release) + if err != nil { + t.Fatalf("CalculateBatchContext failed: %v", err) + } + if ctx.UpdatedReadyReplicas != 1 { + t.Fatalf("UpdatedReadyReplicas = %d, want 1 from updated RS only", ctx.UpdatedReadyReplicas) + } + if err := ctx.IsBatchReady(); err == nil { + t.Fatalf("IsBatchReady succeeded, want not ready error") + } +} + +func TestMinReadyCalculateBatchContextRequiresPodListingForRolloutID(t *testing.T) { + release := releaseDemo.DeepCopy() + release.Status.CanaryStatus.CurrentBatch = 1 + release.Spec.ReleasePlan.RolloutID = "rollout-1" + updateRevision := util.ComputeHash(&deploymentDemo.Spec.Template, nil) + release.Status.UpdateRevision = updateRevision + deployment := newMinReadyDeployment() + deployment.Status.Replicas = 10 + deployment.Status.UpdatedReplicas = 5 + deployment.Status.ReadyReplicas = 5 + rs := newMinReadyReplicaSet(deployment, updateRevision, 5, 5) + control := newBuiltMinReadyControl(t, deployment, rs) + + ctx, err := control.CalculateBatchContext(release) + if err != nil { + t.Fatalf("CalculateBatchContext failed: %v", err) + } + if len(ctx.Pods) != 0 { + t.Fatalf("Pods = %d, want 0 when no pods exist in cluster", len(ctx.Pods)) + } + if err := ctx.IsBatchReady(); err == nil { + t.Fatalf("IsBatchReady succeeded, want batch label not satisfied") + } +} + +func TestMinReadyCalculateBatchContextCountsReadyPodsWhenListed(t *testing.T) { + release := releaseDemo.DeepCopy() + release.Status.CanaryStatus.CurrentBatch = 1 + release.Spec.ReleasePlan.RolloutID = "rollout-1" + updateRevision := util.ComputeHash(&deploymentDemo.Spec.Template, nil) + release.Status.UpdateRevision = updateRevision + deployment := newMinReadyDeployment() + deployment.Status.Replicas = 10 + deployment.Status.UpdatedReplicas = 5 + deployment.Status.ReadyReplicas = 10 + rs := newMinReadyReplicaSet(deployment, updateRevision, 5, 1) + pods := newMinReadyUpdatedPods(deployment, rs, updateRevision, "rollout-1", 5, 3) + control := newBuiltMinReadyControl(t, deployment, rs, pods[0], pods[1], pods[2], pods[3], pods[4]) + + ctx, err := control.CalculateBatchContext(release) + if err != nil { + t.Fatalf("CalculateBatchContext failed: %v", err) + } + if len(ctx.Pods) != 5 { + t.Fatalf("Pods = %d, want 5 listed pods", len(ctx.Pods)) + } + if ctx.UpdatedReadyReplicas != 3 { + t.Fatalf("UpdatedReadyReplicas = %d, want 3 ready updated pods", ctx.UpdatedReadyReplicas) + } + if err := ctx.IsBatchReady(); err == nil { + t.Fatalf("IsBatchReady succeeded, want not ready error") + } +} + func TestMinReadyCalculateBatchContextNotReady(t *testing.T) { release := releaseDemo.DeepCopy() release.Status.CanaryStatus.CurrentBatch = 1 + updateRevision := util.ComputeHash(&deploymentDemo.Spec.Template, nil) + release.Status.UpdateRevision = updateRevision deployment := newMinReadyDeployment() deployment.Status.Replicas = 10 deployment.Status.UpdatedReplicas = 5 deployment.Status.ReadyReplicas = 4 - control := newBuiltMinReadyControl(t, deployment) + rs := newMinReadyReplicaSet(deployment, updateRevision, 5, 4) + control := newBuiltMinReadyControl(t, deployment, rs) ctx, err := control.CalculateBatchContext(release) if err != nil { @@ -229,12 +313,15 @@ func TestMinReadyCalculateBatchContextNotReady(t *testing.T) { func TestMinReadyCalculateBatchContextRecomputesAfterScaling(t *testing.T) { release := releaseDemo.DeepCopy() release.Status.CanaryStatus.CurrentBatch = 1 + updateRevision := util.ComputeHash(&deploymentDemo.Spec.Template, nil) + release.Status.UpdateRevision = updateRevision deployment := newMinReadyDeployment() deployment.Spec.Replicas = pointer.Int32(20) deployment.Status.Replicas = 20 deployment.Status.UpdatedReplicas = 10 deployment.Status.ReadyReplicas = 10 - control := newBuiltMinReadyControl(t, deployment) + rs := newMinReadyReplicaSet(deployment, updateRevision, 10, 10) + control := newBuiltMinReadyControl(t, deployment, rs) ctx, err := control.CalculateBatchContext(release) if err != nil { diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_test_helpers_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_test_helpers_test.go index d9ccd2ee..8eb20256 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_test_helpers_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_test_helpers_test.go @@ -18,13 +18,18 @@ package deployment import ( "context" + "fmt" "testing" apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + + "github.com/openkruise/rollouts/api/v1beta1" ) func newMinReadyDeployment() *apps.Deployment { @@ -33,6 +38,7 @@ func newMinReadyDeployment() *apps.Deployment { maxSurge := intstr.FromInt(1) deployment := deploymentDemo.DeepCopy() deployment.ResourceVersion = "1" + deployment.UID = types.UID("minready-deployment-uid") deployment.Spec.MinReadySeconds = 7 deployment.Spec.ProgressDeadlineSeconds = &progressDeadline deployment.Spec.Strategy.Type = apps.RollingUpdateDeploymentStrategyType @@ -113,3 +119,82 @@ func assertAnnotation(t *testing.T, annotations map[string]string, key, want str t.Fatalf("annotation %s = %q, want %q", key, got, want) } } + +func newMinReadyReplicaSet(deployment *apps.Deployment, updateRevision string, replicas, readyReplicas int32) *apps.ReplicaSet { + return &apps.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-%s", deployment.Name, updateRevision), + Namespace: deployment.Namespace, + UID: types.UID(fmt.Sprintf("rs-%s-%s", deployment.Name, updateRevision)), + Labels: map[string]string{ + apps.DefaultDeploymentUniqueLabelKey: updateRevision, + }, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: apps.SchemeGroupVersion.String(), + Kind: "Deployment", + Name: deployment.Name, + UID: deployment.UID, + Controller: pointerBool(true), + }, + }, + }, + Spec: apps.ReplicaSetSpec{ + Replicas: pointerInt32(replicas), + Selector: deployment.Spec.Selector.DeepCopy(), + Template: deployment.Spec.Template, + }, + Status: apps.ReplicaSetStatus{ + Replicas: replicas, + ReadyReplicas: readyReplicas, + }, + } +} + +func newMinReadyUpdatedPods(deployment *apps.Deployment, rs *apps.ReplicaSet, updateRevision, rolloutID string, total, ready int) []*corev1.Pod { + pods := make([]*corev1.Pod, 0, total) + for i := 0; i < total; i++ { + readyCondition := corev1.ConditionFalse + if i < ready { + readyCondition = corev1.ConditionTrue + } + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-pod-%d", deployment.Name, i), + Namespace: deployment.Namespace, + Labels: map[string]string{ + "app": "busybox", + apps.DefaultDeploymentUniqueLabelKey: updateRevision, + }, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: apps.SchemeGroupVersion.String(), + Kind: "ReplicaSet", + Name: rs.Name, + UID: rs.UID, + Controller: pointerBool(true), + }, + }, + }, + Status: corev1.PodStatus{ + Conditions: []corev1.PodCondition{{ + Type: corev1.PodReady, + Status: readyCondition, + }}, + }, + } + if rolloutID != "" { + pod.Labels[v1beta1.RolloutIDLabel] = rolloutID + } + pods = append(pods, pod) + } + return pods +} + +func pointerInt32(v int32) *int32 { + return &v +} + +func pointerBool(v bool) *bool { + return &v +} diff --git a/test/integration/deployment_minready_test.go b/test/integration/deployment_minready_test.go index 3d83c0e4..84013417 100644 --- a/test/integration/deployment_minready_test.go +++ b/test/integration/deployment_minready_test.go @@ -105,7 +105,7 @@ func TestDeploymentMinReadyControlPlaneRejectsCoveringPDB(t *testing.T) { assertIntegrationEvent(t, recorder, "MinReadyDegradedPDBIncompatible") } -func TestDeploymentMinReadyControlPlaneUpgradeBatchUsesReadyReplicas(t *testing.T) { +func TestDeploymentMinReadyControlPlaneUpgradeBatchUsesUpdatedReadyReplicas(t *testing.T) { _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") release := newIntegrationMinReadyRelease() release.Status.CanaryStatus.CurrentBatch = 1 @@ -113,8 +113,9 @@ func TestDeploymentMinReadyControlPlaneUpgradeBatchUsesReadyReplicas(t *testing. deployment.Status.Replicas = 10 deployment.Status.UpdatedReplicas = 5 deployment.Status.ReadyReplicas = 5 + rs := newIntegrationUpdatedReplicaSet(deployment, release.Status.UpdateRevision, 5, 5) recorder := record.NewFakeRecorder(20) - cli := newIntegrationClient(release, deployment) + cli := newIntegrationClient(release, deployment, rs) status := release.Status.DeepCopy() control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) @@ -133,6 +134,28 @@ func TestDeploymentMinReadyControlPlaneUpgradeBatchUsesReadyReplicas(t *testing. assertIntegrationEvent(t, recorder, "MinReadyBatchReady") } +func TestDeploymentMinReadyControlPlaneWaitsForUpdatedReadyReplicas(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := newIntegrationMinReadyRelease() + release.Status.CanaryStatus.CurrentBatch = 1 + deployment := newInflatedIntegrationDeployment() + deployment.Status.Replicas = 10 + deployment.Status.UpdatedReplicas = 5 + deployment.Status.ReadyReplicas = 10 + rs := newIntegrationUpdatedReplicaSet(deployment, release.Status.UpdateRevision, 5, 1) + recorder := record.NewFakeRecorder(20) + cli := newIntegrationClient(release, deployment, rs) + status := release.Status.DeepCopy() + control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) + + if err := control.UpgradeBatch(); err != nil { + t.Fatalf("UpgradeBatch failed: %v", err) + } + if err := control.EnsureBatchPodsReadyAndLabeled(); err == nil { + t.Fatalf("EnsureBatchPodsReadyAndLabeled succeeded, want updated ready wait error") + } +} + func TestDeploymentMinReadyControlPlaneFinalizeRestoresOriginalFields(t *testing.T) { _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") release := newIntegrationMinReadyRelease() diff --git a/test/integration/minready_helpers_test.go b/test/integration/minready_helpers_test.go index 6a5430b2..ede126c9 100644 --- a/test/integration/minready_helpers_test.go +++ b/test/integration/minready_helpers_test.go @@ -86,6 +86,7 @@ func newIntegrationDeployment() *apps.Deployment { Name: "demo", Namespace: "default", ResourceVersion: "1", + UID: types.UID("integration-deployment-uid"), Labels: map[string]string{"app": "demo"}, }, Spec: apps.DeploymentSpec{ @@ -149,6 +150,36 @@ func newIntegrationClient(objects ...client.Object) client.Client { Build() } +func newIntegrationUpdatedReplicaSet(deployment *apps.Deployment, updateRevision string, replicas, readyReplicas int32) *apps.ReplicaSet { + return &apps.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: deployment.Name + "-" + updateRevision, + Namespace: deployment.Namespace, + Labels: map[string]string{ + apps.DefaultDeploymentUniqueLabelKey: updateRevision, + }, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: apps.SchemeGroupVersion.String(), + Kind: "Deployment", + Name: deployment.Name, + UID: deployment.UID, + Controller: pointer.Bool(true), + }, + }, + }, + Spec: apps.ReplicaSetSpec{ + Replicas: pointer.Int32(replicas), + Selector: deployment.Spec.Selector.DeepCopy(), + Template: deployment.Spec.Template, + }, + Status: apps.ReplicaSetStatus{ + Replicas: replicas, + ReadyReplicas: readyReplicas, + }, + } +} + func newIntegrationMinReadyControl( cli client.Client, recorder record.EventRecorder, From fb501faa391f8d198eca049161839655b49ba22c Mon Sep 17 00:00:00 2001 From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Date: Sun, 7 Jun 2026 16:06:46 +0800 Subject: [PATCH 03/22] fix: align MinReady implementation with proposal review Signed-off-by: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> --- api/v1alpha1/batchrelease_plan_types.go | 5 - api/v1alpha1/conversion.go | 24 ++--- api/v1alpha1/rollout_conversion_test.go | 94 ---------------- api/v1alpha1/rollout_types.go | 14 --- api/v1beta1/batchrelease_plan_types.go | 5 - api/v1beta1/rollout_types.go | 14 --- .../rollouts.kruise.io_batchreleases.yaml | 16 --- .../bases/rollouts.kruise.io_rollouts.yaml | 16 --- .../batchrelease_controller_test.go | 13 +-- .../batchrelease/batchrelease_executor.go | 6 +- .../partitionstyle/deployment/control.go | 12 ++- .../deployment/minready_batch_context.go | 52 ++++----- .../deployment/minready_control.go | 89 ++++++++++++---- .../deployment/minready_control_test.go | 100 ++++++++++++++++-- .../deployment/minready_finalize_test.go | 20 +++- .../partitionstyle/deployment/minready_pdb.go | 51 --------- .../deployment/minready_test_helpers_test.go | 39 ++++++- .../control/partitionstyle/minready_status.go | 71 ++++++++----- .../partitionstyle/minready_status_test.go | 49 ++++++++- .../batchrelease/metrics/minready_metrics.go | 1 - .../metrics/minready_metrics_test.go | 4 +- pkg/controller/rollout/rollout_canary.go | 1 - .../rollout/rollout_releaseManager_test.go | 20 ---- .../mutating/workload_update_handler.go | 31 +++++- .../mutating/workload_update_handler_test.go | 18 ++-- test/e2e/deployment_minready_helpers_test.go | 1 - test/e2e/deployment_minready_pdb_test.go | 8 +- test/integration/deployment_minready_test.go | 29 ++--- test/integration/minready_helpers_test.go | 48 ++++++++- 29 files changed, 447 insertions(+), 404 deletions(-) delete mode 100644 api/v1alpha1/rollout_conversion_test.go delete mode 100644 pkg/controller/batchrelease/control/partitionstyle/deployment/minready_pdb.go diff --git a/api/v1alpha1/batchrelease_plan_types.go b/api/v1alpha1/batchrelease_plan_types.go index 4ac41599..b947050a 100644 --- a/api/v1alpha1/batchrelease_plan_types.go +++ b/api/v1alpha1/batchrelease_plan_types.go @@ -56,11 +56,6 @@ type ReleasePlan struct { PatchPodTemplateMetadata *PatchPodTemplateMetadata `json:"patchPodTemplateMetadata,omitempty"` // RollingStyle can be "Canary", "Partiton" or "BlueGreen" RollingStyle RollingStyleType `json:"rollingStyle,omitempty"` - // DeploymentStrategy controls how native Deployment workloads are advanced. - // Empty means Recreate for backward compatibility. - // +kubebuilder:validation:Enum=Recreate;MinReadySeconds - // +optional - DeploymentStrategy DeploymentStrategyType `json:"deploymentStrategy,omitempty"` // EnableExtraWorkloadForCanary indicates whether to create extra workload for canary // True corresponds to RollingStyle "Canary". // False corresponds to RollingStyle "Partiton". diff --git a/api/v1alpha1/conversion.go b/api/v1alpha1/conversion.go index b6a5ed5d..555c8e5c 100644 --- a/api/v1alpha1/conversion.go +++ b/api/v1alpha1/conversion.go @@ -44,8 +44,7 @@ func (src *Rollout) ConvertTo(dst conversion.Hub) error { obj.Spec.Strategy = v1beta1.RolloutStrategy{ Paused: srcSpec.Strategy.Paused, Canary: &v1beta1.CanaryStrategy{ - DeploymentStrategy: v1beta1.DeploymentStrategyType(srcSpec.Strategy.Canary.DeploymentStrategy), - FailureThreshold: srcSpec.Strategy.Canary.FailureThreshold, + FailureThreshold: srcSpec.Strategy.Canary.FailureThreshold, }, } for _, step := range srcSpec.Strategy.Canary.Steps { @@ -190,8 +189,7 @@ func (dst *Rollout) ConvertFrom(src conversion.Hub) error { Strategy: RolloutStrategy{ Paused: srcV1beta1.Spec.Strategy.Paused, Canary: &CanaryStrategy{ - DeploymentStrategy: DeploymentStrategyType(srcV1beta1.Spec.Strategy.Canary.DeploymentStrategy), - FailureThreshold: srcV1beta1.Spec.Strategy.Canary.FailureThreshold, + FailureThreshold: srcV1beta1.Spec.Strategy.Canary.FailureThreshold, }, }, Disabled: srcV1beta1.Spec.Disabled, @@ -328,11 +326,10 @@ func (src *BatchRelease) ConvertTo(dst conversion.Hub) error { Name: srcSpec.TargetRef.WorkloadRef.Name, } obj.Spec.ReleasePlan = v1beta1.ReleasePlan{ - BatchPartition: srcSpec.ReleasePlan.BatchPartition, - RolloutID: srcSpec.ReleasePlan.RolloutID, - FailureThreshold: srcSpec.ReleasePlan.FailureThreshold, - FinalizingPolicy: v1beta1.FinalizingPolicyType(srcSpec.ReleasePlan.FinalizingPolicy), - DeploymentStrategy: v1beta1.DeploymentStrategyType(srcSpec.ReleasePlan.DeploymentStrategy), + BatchPartition: srcSpec.ReleasePlan.BatchPartition, + RolloutID: srcSpec.ReleasePlan.RolloutID, + FailureThreshold: srcSpec.ReleasePlan.FailureThreshold, + FinalizingPolicy: v1beta1.FinalizingPolicyType(srcSpec.ReleasePlan.FinalizingPolicy), } for _, batch := range srcSpec.ReleasePlan.Batches { o := v1beta1.ReleaseBatch{ @@ -414,11 +411,10 @@ func (dst *BatchRelease) ConvertFrom(src conversion.Hub) error { Name: srcSpec.WorkloadRef.Name, } dst.Spec.ReleasePlan = ReleasePlan{ - BatchPartition: srcSpec.ReleasePlan.BatchPartition, - RolloutID: srcSpec.ReleasePlan.RolloutID, - FailureThreshold: srcSpec.ReleasePlan.FailureThreshold, - FinalizingPolicy: FinalizingPolicyType(srcSpec.ReleasePlan.FinalizingPolicy), - DeploymentStrategy: DeploymentStrategyType(srcSpec.ReleasePlan.DeploymentStrategy), + BatchPartition: srcSpec.ReleasePlan.BatchPartition, + RolloutID: srcSpec.ReleasePlan.RolloutID, + FailureThreshold: srcSpec.ReleasePlan.FailureThreshold, + FinalizingPolicy: FinalizingPolicyType(srcSpec.ReleasePlan.FinalizingPolicy), } for _, batch := range srcSpec.ReleasePlan.Batches { obj := ReleaseBatch{ diff --git a/api/v1alpha1/rollout_conversion_test.go b/api/v1alpha1/rollout_conversion_test.go deleted file mode 100644 index 213ef294..00000000 --- a/api/v1alpha1/rollout_conversion_test.go +++ /dev/null @@ -1,94 +0,0 @@ -/* -Copyright 2026 The Kruise Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package v1alpha1 - -import ( - "testing" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - "github.com/openkruise/rollouts/api/v1beta1" -) - -func TestRoundTripDeploymentStrategyFromV1alpha1(t *testing.T) { - source := &Rollout{ - ObjectMeta: metav1.ObjectMeta{ - Name: "demo", - Annotations: map[string]string{ - RolloutStyleAnnotation: string(PartitionRollingStyle), - }, - }, - Spec: RolloutSpec{ - ObjectRef: ObjectRef{WorkloadRef: &WorkloadRef{ - APIVersion: "apps/v1", - Kind: "Deployment", - Name: "demo", - }}, - Strategy: RolloutStrategy{Canary: &CanaryStrategy{ - DeploymentStrategy: DeploymentStrategyMinReadySeconds, - }}, - }, - } - - hub := &v1beta1.Rollout{} - if err := source.ConvertTo(hub); err != nil { - t.Fatalf("ConvertTo failed: %v", err) - } - if got := hub.Spec.Strategy.Canary.DeploymentStrategy; got != v1beta1.DeploymentStrategyMinReadySeconds { - t.Fatalf("ConvertTo DeploymentStrategy = %q, want %q", got, v1beta1.DeploymentStrategyMinReadySeconds) - } - - roundTripped := &Rollout{} - if err := roundTripped.ConvertFrom(hub); err != nil { - t.Fatalf("ConvertFrom failed: %v", err) - } - if got := roundTripped.Spec.Strategy.Canary.DeploymentStrategy; got != DeploymentStrategyMinReadySeconds { - t.Fatalf("round-trip DeploymentStrategy = %q, want %q", got, DeploymentStrategyMinReadySeconds) - } -} - -func TestRoundTripDeploymentStrategyFromV1beta1(t *testing.T) { - source := &v1beta1.Rollout{ - ObjectMeta: metav1.ObjectMeta{Name: "demo"}, - Spec: v1beta1.RolloutSpec{ - WorkloadRef: v1beta1.ObjectRef{ - APIVersion: "apps/v1", - Kind: "Deployment", - Name: "demo", - }, - Strategy: v1beta1.RolloutStrategy{Canary: &v1beta1.CanaryStrategy{ - DeploymentStrategy: v1beta1.DeploymentStrategyMinReadySeconds, - }}, - }, - } - - spoke := &Rollout{} - if err := spoke.ConvertFrom(source); err != nil { - t.Fatalf("ConvertFrom failed: %v", err) - } - if got := spoke.Spec.Strategy.Canary.DeploymentStrategy; got != DeploymentStrategyMinReadySeconds { - t.Fatalf("ConvertFrom DeploymentStrategy = %q, want %q", got, DeploymentStrategyMinReadySeconds) - } - - roundTripped := &v1beta1.Rollout{} - if err := spoke.ConvertTo(roundTripped); err != nil { - t.Fatalf("ConvertTo failed: %v", err) - } - if got := roundTripped.Spec.Strategy.Canary.DeploymentStrategy; got != v1beta1.DeploymentStrategyMinReadySeconds { - t.Fatalf("round-trip DeploymentStrategy = %q, want %q", got, v1beta1.DeploymentStrategyMinReadySeconds) - } -} diff --git a/api/v1alpha1/rollout_types.go b/api/v1alpha1/rollout_types.go index 4939164b..f30ba7d4 100644 --- a/api/v1alpha1/rollout_types.go +++ b/api/v1alpha1/rollout_types.go @@ -105,11 +105,6 @@ type RolloutStrategy struct { // CanaryStrategy defines parameters for a Replica Based Canary type CanaryStrategy struct { - // DeploymentStrategy controls how native Deployment workloads are advanced. - // Empty means Recreate for backward compatibility. - // +kubebuilder:validation:Enum=Recreate;MinReadySeconds - // +optional - DeploymentStrategy DeploymentStrategyType `json:"deploymentStrategy,omitempty"` // Steps define the order of phases to execute release in batches(20%, 40%, 60%, 80%, 100%) // +optional Steps []CanaryStep `json:"steps,omitempty"` @@ -130,15 +125,6 @@ type CanaryStrategy struct { DisableGenerateCanaryService bool `json:"disableGenerateCanaryService,omitempty"` } -type DeploymentStrategyType string - -const ( - // DeploymentStrategyRecreate keeps the existing Recreate-based Deployment rollout behavior. - DeploymentStrategyRecreate DeploymentStrategyType = "Recreate" - // DeploymentStrategyMinReadySeconds uses native RollingUpdate with inflated minReadySeconds. - DeploymentStrategyMinReadySeconds DeploymentStrategyType = "MinReadySeconds" -) - type PatchPodTemplateMetadata struct { // annotations Annotations map[string]string `json:"annotations,omitempty"` diff --git a/api/v1beta1/batchrelease_plan_types.go b/api/v1beta1/batchrelease_plan_types.go index 250245c1..ddc428a3 100644 --- a/api/v1beta1/batchrelease_plan_types.go +++ b/api/v1beta1/batchrelease_plan_types.go @@ -56,11 +56,6 @@ type ReleasePlan struct { PatchPodTemplateMetadata *PatchPodTemplateMetadata `json:"patchPodTemplateMetadata,omitempty"` // RollingStyle can be "Canary", "Partiton" or "BlueGreen" RollingStyle RollingStyleType `json:"rollingStyle,omitempty"` - // DeploymentStrategy controls how native Deployment workloads are advanced. - // Empty means Recreate for backward compatibility. - // +kubebuilder:validation:Enum=Recreate;MinReadySeconds - // +optional - DeploymentStrategy DeploymentStrategyType `json:"deploymentStrategy,omitempty"` // EnableExtraWorkloadForCanary indicates whether to create extra workload for canary // True corresponds to RollingStyle "Canary". // False corresponds to RollingStyle "Partiton". diff --git a/api/v1beta1/rollout_types.go b/api/v1beta1/rollout_types.go index 00d21d42..50ba2a40 100644 --- a/api/v1beta1/rollout_types.go +++ b/api/v1beta1/rollout_types.go @@ -192,11 +192,6 @@ type BlueGreenStrategy struct { // CanaryStrategy defines parameters for a Replica Based Canary type CanaryStrategy struct { - // DeploymentStrategy controls how native Deployment workloads are advanced. - // Empty means Recreate for backward compatibility. - // +kubebuilder:validation:Enum=Recreate;MinReadySeconds - // +optional - DeploymentStrategy DeploymentStrategyType `json:"deploymentStrategy,omitempty"` // Steps define the order of phases to execute release in batches(20%, 40%, 60%, 80%, 100%) // +kubebuilder:validation:MaxItems=50 // +optional @@ -224,15 +219,6 @@ type CanaryStrategy struct { DisableGenerateCanaryService bool `json:"disableGenerateCanaryService,omitempty"` } -type DeploymentStrategyType string - -const ( - // DeploymentStrategyRecreate keeps the existing Recreate-based Deployment rollout behavior. - DeploymentStrategyRecreate DeploymentStrategyType = "Recreate" - // DeploymentStrategyMinReadySeconds uses native RollingUpdate with inflated minReadySeconds. - DeploymentStrategyMinReadySeconds DeploymentStrategyType = "MinReadySeconds" -) - type PatchPodTemplateMetadata struct { // annotations Annotations map[string]string `json:"annotations,omitempty"` diff --git a/config/crd/bases/rollouts.kruise.io_batchreleases.yaml b/config/crd/bases/rollouts.kruise.io_batchreleases.yaml index a826d67a..fef0369e 100644 --- a/config/crd/bases/rollouts.kruise.io_batchreleases.yaml +++ b/config/crd/bases/rollouts.kruise.io_batchreleases.yaml @@ -93,14 +93,6 @@ spec: - canaryReplicas type: object type: array - deploymentStrategy: - description: |- - DeploymentStrategy controls how native Deployment workloads are advanced. - Empty means Recreate for backward compatibility. - enum: - - Recreate - - MinReadySeconds - type: string enableExtraWorkloadForCanary: description: |- EnableExtraWorkloadForCanary indicates whether to create extra workload for canary @@ -385,14 +377,6 @@ spec: - canaryReplicas type: object type: array - deploymentStrategy: - description: |- - DeploymentStrategy controls how native Deployment workloads are advanced. - Empty means Recreate for backward compatibility. - enum: - - Recreate - - MinReadySeconds - type: string enableExtraWorkloadForCanary: description: |- EnableExtraWorkloadForCanary indicates whether to create extra workload for canary diff --git a/config/crd/bases/rollouts.kruise.io_rollouts.yaml b/config/crd/bases/rollouts.kruise.io_rollouts.yaml index b20097b7..d0dd7d03 100644 --- a/config/crd/bases/rollouts.kruise.io_rollouts.yaml +++ b/config/crd/bases/rollouts.kruise.io_rollouts.yaml @@ -104,14 +104,6 @@ spec: description: CanaryStrategy defines parameters for a Replica Based Canary properties: - deploymentStrategy: - description: |- - DeploymentStrategy controls how native Deployment workloads are advanced. - Empty means Recreate for backward compatibility. - enum: - - Recreate - - MinReadySeconds - type: string disableGenerateCanaryService: description: canary service will not be generated if DisableGenerateCanaryService is true @@ -1190,14 +1182,6 @@ spec: description: CanaryStrategy defines parameters for a Replica Based Canary properties: - deploymentStrategy: - description: |- - DeploymentStrategy controls how native Deployment workloads are advanced. - Empty means Recreate for backward compatibility. - enum: - - Recreate - - MinReadySeconds - type: string disableGenerateCanaryService: description: canary service will not be generated if DisableGenerateCanaryService is true diff --git a/pkg/controller/batchrelease/batchrelease_controller_test.go b/pkg/controller/batchrelease/batchrelease_controller_test.go index 63e7619a..87db3cb4 100644 --- a/pkg/controller/batchrelease/batchrelease_controller_test.go +++ b/pkg/controller/batchrelease/batchrelease_controller_test.go @@ -833,7 +833,6 @@ func TestExecutorFallsBackToRecreateWhenMinReadyFeatureGateDisabled(t *testing.T _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=false") release := releaseDeploy.DeepCopy() release.Spec.ReleasePlan.RollingStyle = v1beta1.PartitionRollingStyle - release.Spec.ReleasePlan.DeploymentStrategy = v1beta1.DeploymentStrategyMinReadySeconds release.Status.Phase = v1beta1.RolloutPhasePreparing deployment := stableDeploy.DeepCopy() rec := record.NewFakeRecorder(100) @@ -880,11 +879,10 @@ func TestMinReadyControlPlaneRecordsInitializedConditionAndEvent(t *testing.T) { } assertCondition(t, status, v1beta1.RolloutConditionMinReadyInitialized, corev1.ConditionTrue, "MinReadyInitialized") - assertCondition(t, status, v1beta1.RolloutConditionMinReadyDegraded, corev1.ConditionFalse, "MinReadyHealthy") assertRecordedEvent(t, rec, "MinReadyInitialized") } -func TestMinReadyControlPlaneRecordsDegradedForPDB(t *testing.T) { +func TestMinReadyControlPlaneAllowsPDBCoexistence(t *testing.T) { _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") release := minReadyRelease() deployment := stableDeploy.DeepCopy() @@ -907,13 +905,11 @@ func TestMinReadyControlPlaneRecordsDegradedForPDB(t *testing.T) { t.Fatalf("getReleaseController failed: %v", err) } - err = controller.Initialize() - if err == nil || !strings.Contains(err.Error(), "MinReadyDegradedPDBIncompatible") { - t.Fatalf("Initialize error = %v, want PDB degraded", err) + if err := controller.Initialize(); err != nil { + t.Fatalf("Initialize failed: %v", err) } - assertCondition(t, status, v1beta1.RolloutConditionMinReadyDegraded, corev1.ConditionTrue, "MinReadyDegradedPDBIncompatible") - assertRecordedEvent(t, rec, "MinReadyDegradedPDBIncompatible") + assertCondition(t, status, v1beta1.RolloutConditionMinReadyInitialized, corev1.ConditionTrue, "MinReadyInitialized") } func BenchmarkRecreateReconcile(b *testing.B) { @@ -965,7 +961,6 @@ func BenchmarkMinReadyReconcile(b *testing.B) { func minReadyRelease() *v1beta1.BatchRelease { release := releaseDeploy.DeepCopy() release.Spec.ReleasePlan.RollingStyle = v1beta1.PartitionRollingStyle - release.Spec.ReleasePlan.DeploymentStrategy = v1beta1.DeploymentStrategyMinReadySeconds release.Status.Phase = v1beta1.RolloutPhasePreparing return release } diff --git a/pkg/controller/batchrelease/batchrelease_executor.go b/pkg/controller/batchrelease/batchrelease_executor.go index b62627f5..0f20dacd 100644 --- a/pkg/controller/batchrelease/batchrelease_executor.go +++ b/pkg/controller/batchrelease/batchrelease_executor.go @@ -246,14 +246,10 @@ func (r *Executor) getReleaseController(release *v1beta1.BatchRelease, newStatus return partitionstyle.NewControlPlane(cloneset.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil } if targetRef.APIVersion == apps.SchemeGroupVersion.String() && targetRef.Kind == reflect.TypeOf(apps.Deployment{}).Name() { - if release.Spec.ReleasePlan.DeploymentStrategy == v1beta1.DeploymentStrategyMinReadySeconds && - utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) { + if utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) { klog.InfoS("Using Deployment MinReadySeconds partition-style release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) return partitionstyle.NewControlPlane(partitiondeployment.NewMinReadyController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil } - if release.Spec.ReleasePlan.DeploymentStrategy == v1beta1.DeploymentStrategyMinReadySeconds { - klog.InfoS("MinReadySecondsStrategy feature gate disabled, using Recreate deployment controller", "workload name", targetKey.Name, "namespace", targetKey.Namespace) - } klog.InfoS("Using Deployment partition-style release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) return partitionstyle.NewControlPlane(partitiondeployment.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil } diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/control.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/control.go index d34a1d51..4623e6bd 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/control.go @@ -49,16 +49,20 @@ type realController struct { } func NewController(cli client.Client, key types.NamespacedName, _ schema.GroupVersionKind) partitionstyle.Interface { + return newRealController(cli, key) +} + +func NewMinReadyController(cli client.Client, key types.NamespacedName, _ schema.GroupVersionKind) partitionstyle.Interface { + return &MinReadyControl{realController: newRealController(cli, key)} +} + +func newRealController(cli client.Client, key types.NamespacedName) *realController { return &realController{ key: key, client: cli, } } -func NewMinReadyController(cli client.Client, key types.NamespacedName, gvk schema.GroupVersionKind) partitionstyle.Interface { - return &MinReadyControl{realController: NewController(cli, key, gvk).(*realController)} -} - func (rc *realController) GetWorkloadInfo() *util.WorkloadInfo { return rc.WorkloadInfo } diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_batch_context.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_batch_context.go index 23207823..3d60503d 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_batch_context.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_batch_context.go @@ -17,59 +17,49 @@ limitations under the License. package deployment import ( - "context" "fmt" + "time" apps "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" - "sigs.k8s.io/controller-runtime/pkg/client" "github.com/openkruise/rollouts/pkg/util" ) func (mc *MinReadyControl) minReadyUpdatedReadyReplicas(updateRevision string) (int32, error) { - if len(mc.pods) > 0 { - return countUpdatedReadyPods(mc.pods, updateRevision), nil + original, err := parseOriginalDeploymentStrategy(mc.object.Annotations) + if err != nil { + return 0, err + } + if _, err := mc.ListOwnedPods(); err != nil { + return 0, err } - return mc.updatedReadyReplicasFromReplicaSet(updateRevision) + return countUpdatedAvailablePods(mc.pods, updateRevision, originalMinReadySeconds(original), time.Now()), nil } -func countUpdatedReadyPods(pods []*corev1.Pod, updateRevision string) int32 { +func countUpdatedAvailablePods(pods []*corev1.Pod, updateRevision string, minReadySeconds int32, now time.Time) int32 { return int32(util.WrappedPodCount(pods, func(pod *corev1.Pod) bool { if !pod.DeletionTimestamp.IsZero() { return false } - return util.IsConsistentWithRevision(pod.Labels, updateRevision) && util.IsPodReady(pod) - })) -} - -func (mc *MinReadyControl) updatedReadyReplicasFromReplicaSet(updateRevision string) (int32, error) { - rsList := &apps.ReplicaSetList{} - if err := mc.client.List(context.TODO(), rsList, client.InNamespace(mc.object.Namespace)); err != nil { - return 0, fmt.Errorf("list ReplicaSets: %w", err) - } - - var ready int32 - for i := range rsList.Items { - rs := &rsList.Items[i] - if !metav1.IsControlledBy(rs, mc.object) { - continue + if !util.IsConsistentWithRevision(pod.Labels, updateRevision) { + return false } - if !replicaSetMatchesUpdateRevision(rs, updateRevision) { - continue + ready := util.GetPodReadyCondition(pod.Status) + if ready == nil || ready.Status != corev1.ConditionTrue { + return false } - ready += rs.Status.ReadyReplicas - } - return ready, nil + return ready.LastTransitionTime.Add(time.Duration(minReadySeconds)*time.Second).Before(now) || + ready.LastTransitionTime.Add(time.Duration(minReadySeconds)*time.Second).Equal(now) + })) } -func replicaSetMatchesUpdateRevision(rs *apps.ReplicaSet, updateRevision string) bool { - if util.ComputeHash(&rs.Spec.Template, nil) == updateRevision { - return true +func originalMinReadySeconds(original *originalDeploymentStrategy) int32 { + if original.minReadySeconds == nil { + return 0 } - return util.IsConsistentWithRevision(rs.Labels, updateRevision) + return *original.minReadySeconds } func minReadyDesiredUpdatedReplicas(desired intstr.IntOrString, deployment *apps.Deployment) (int32, error) { diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go index c956fb05..a0e650a0 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go @@ -43,7 +43,11 @@ func (mc *MinReadyControl) BuildController() (partitionstyle.Interface, error) { if err != nil { return nil, err } - return &MinReadyControl{realController: built.(*realController)}, nil + rc, ok := built.(*realController) + if !ok { + return nil, fmt.Errorf("MinReadyControl.BuildController: expected *realController, got %T", built) + } + return &MinReadyControl{realController: rc}, nil } func (mc *MinReadyControl) Initialize(_ *v1beta1.BatchRelease) error { @@ -56,7 +60,7 @@ func (mc *MinReadyControl) Initialize(_ *v1beta1.BatchRelease) error { return fmt.Errorf("MinReadyControl.Initialize: %w", err) } if hasAnyOriginalAnnotation(original.Annotations) { - if err := ensureInflatedDeploymentStrategy(original); err != nil { + if err := validateInflatedDeploymentStrategy(original); err != nil { return fmt.Errorf("MinReadyControl.Initialize: %w", err) } } @@ -66,10 +70,7 @@ func (mc *MinReadyControl) Initialize(_ *v1beta1.BatchRelease) error { } func (mc *MinReadyControl) UpgradeBatch(ctx *batchcontext.BatchContext) error { - if mc.object.Spec.Strategy.RollingUpdate == nil { - return fmt.Errorf("MinReadyControl.UpgradeBatch[%d]: rollingUpdate is nil", ctx.CurrentBatch) - } - if err := ensureInflatedDeploymentStrategy(mc.object); err != nil { + if err := mc.ensureInflatedDeploymentStrategy(); err != nil { return fmt.Errorf("MinReadyControl.UpgradeBatch[%d]: %w", ctx.CurrentBatch, err) } current, err := intstr.GetScaledValueFromIntOrPercent( @@ -98,6 +99,9 @@ func (mc *MinReadyControl) Finalize(_ *v1beta1.BatchRelease) error { return nil } if !hasAnyOriginalAnnotation(mc.object.Annotations) { + if hasInflatedDeploymentFields(mc.object) { + return fmt.Errorf("MinReadyControl.Finalize: annotation state missing while deployment fields are still inflated") + } return nil } original := mc.object.DeepCopy() @@ -154,13 +158,9 @@ func (mc *MinReadyControl) ensureInitializeAllowed() error { if !utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) { return fmt.Errorf("%s feature gate is disabled", feature.MinReadySecondsStrategy) } - covered, err := mc.hasPDBCoveringDeployment() - if err != nil { + if err := validateDeploymentStrategyType(mc.object); err != nil { return err } - if covered { - return fmt.Errorf("%s: PDB detected", EventDegradedPDBIncompatible) - } return nil } @@ -169,7 +169,8 @@ func writeOriginalAnnotations(original, modified *apps.Deployment) error { modified.Annotations = map[string]string{} } if hasAnyOriginalAnnotation(original.Annotations) { - return ensureAllOriginalAnnotations(original.Annotations) + _, err := parseOriginalDeploymentStrategy(original.Annotations) + return err } modified.Annotations[AnnotationOriginalMinReadySeconds] = serializeOriginalInt32(&original.Spec.MinReadySeconds) modified.Annotations[AnnotationOriginalProgressDeadlineSeconds] = serializeOriginalInt32(original.Spec.ProgressDeadlineSeconds) @@ -180,8 +181,8 @@ func writeOriginalAnnotations(original, modified *apps.Deployment) error { func ensureAllOriginalAnnotations(annotations map[string]string) error { for _, key := range AllOriginalAnnotations { - if _, ok := annotations[key]; !ok { - return fmt.Errorf("annotation %s missing", key) + if _, err := readOriginalAnnotation(annotations, key); err != nil { + return err } } return nil @@ -204,17 +205,37 @@ func originalMaxSurge(deployment *apps.Deployment) *intstr.IntOrString { func inflateDeploymentStrategy(deployment *apps.Deployment) { progressDeadlineSeconds := InflatedProgressDeadlineSeconds maxUnavailable := intstr.FromInt(0) - maxSurge := intstr.FromInt(int(InflatedMaxSurgeInt)) deployment.Spec.MinReadySeconds = InflatedMinReadySeconds deployment.Spec.ProgressDeadlineSeconds = &progressDeadlineSeconds if deployment.Spec.Strategy.RollingUpdate == nil { deployment.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{} } deployment.Spec.Strategy.RollingUpdate.MaxUnavailable = &maxUnavailable - deployment.Spec.Strategy.RollingUpdate.MaxSurge = &maxSurge + applyMaxSurgeValidationFallback(deployment) +} + +func (mc *MinReadyControl) ensureInflatedDeploymentStrategy() error { + if err := validateDeploymentStrategyType(mc.object); err != nil { + return err + } + if validateInflatedDeploymentStrategy(mc.object) == nil { + return nil + } + original := mc.object.DeepCopy() + modified := original.DeepCopy() + inflateDeploymentStrategy(modified) + patch := client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{}) + if err := mc.client.Patch(context.TODO(), modified, patch); err != nil { + return err + } + mc.object = modified + return nil } -func ensureInflatedDeploymentStrategy(deployment *apps.Deployment) error { +func validateInflatedDeploymentStrategy(deployment *apps.Deployment) error { + if err := validateDeploymentStrategyType(deployment); err != nil { + return err + } if deployment.Spec.MinReadySeconds != InflatedMinReadySeconds { return fmt.Errorf("%s: minReadySeconds=%d want %d", EventDegradedDriftDetected, deployment.Spec.MinReadySeconds, InflatedMinReadySeconds) @@ -226,12 +247,41 @@ func ensureInflatedDeploymentStrategy(deployment *apps.Deployment) error { if deployment.Spec.Strategy.RollingUpdate == nil { return fmt.Errorf("%s: rollingUpdate is nil", EventDegradedDriftDetected) } - if maxSurge := deployment.Spec.Strategy.RollingUpdate.MaxSurge; maxSurge == nil || maxSurge.Type != intstr.Int || maxSurge.IntVal != InflatedMaxSurgeInt { - return fmt.Errorf("%s: maxSurge=%v want %d", EventDegradedDriftDetected, maxSurge, InflatedMaxSurgeInt) + return nil +} + +func validateDeploymentStrategyType(deployment *apps.Deployment) error { + if deployment.Spec.Strategy.Type != apps.RollingUpdateDeploymentStrategyType { + return fmt.Errorf("%s: deployment strategy type %s is not RollingUpdate", + EventDegradedDriftDetected, deployment.Spec.Strategy.Type) } return nil } +func hasInflatedDeploymentFields(deployment *apps.Deployment) bool { + if deployment.Spec.MinReadySeconds == InflatedMinReadySeconds { + return true + } + return deployment.Spec.ProgressDeadlineSeconds != nil && + *deployment.Spec.ProgressDeadlineSeconds == InflatedProgressDeadlineSeconds +} + +func applyMaxSurgeValidationFallback(deployment *apps.Deployment) { + if deployment.Spec.Strategy.RollingUpdate.MaxSurge == nil { + return + } + replicas := int32(1) + if deployment.Spec.Replicas != nil && *deployment.Spec.Replicas > 0 { + replicas = *deployment.Spec.Replicas + } + surge, err := intstr.GetScaledValueFromIntOrPercent(deployment.Spec.Strategy.RollingUpdate.MaxSurge, int(replicas), true) + if err != nil || surge > 0 { + return + } + maxSurge := intstr.FromInt(1) + deployment.Spec.Strategy.RollingUpdate.MaxSurge = &maxSurge +} + type originalDeploymentStrategy struct { minReadySeconds *int32 progressDeadlineSeconds *int32 @@ -284,7 +334,6 @@ func applyOriginalDeploymentStrategy(deployment *apps.Deployment, original *orig deployment.Spec.Strategy.RollingUpdate.MaxSurge = original.maxSurge } -const EventDegradedPDBIncompatible = "MinReadyDegradedPDBIncompatible" const EventDegradedDriftDetected = "MinReadyDegradedDriftDetected" var _ partitionstyle.Interface = (*MinReadyControl)(nil) diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go index 3db89d99..c0661c4d 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go @@ -108,6 +108,23 @@ func TestMinReadyInitializeRejectsPartialOriginalAnnotations(t *testing.T) { } } +func TestMinReadyInitializeRejectsEmptyOriginalAnnotations(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newMinReadyDeployment() + deployment.Annotations = map[string]string{ + AnnotationOriginalMinReadySeconds: "", + AnnotationOriginalProgressDeadlineSeconds: "30", + AnnotationOriginalMaxUnavailable: "10%", + AnnotationOriginalMaxSurge: "2", + } + control := newBuiltMinReadyControl(t, deployment) + + err := control.Initialize(releaseDemo.DeepCopy()) + if err == nil || !strings.Contains(err.Error(), "present but empty") { + t.Fatalf("Initialize error = %v, want empty annotation error", err) + } +} + func TestMinReadyInitializeSerializesKubernetesDefaults(t *testing.T) { _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") deployment := newMinReadyDeployment() @@ -123,7 +140,7 @@ func TestMinReadyInitializeSerializesKubernetesDefaults(t *testing.T) { assertAnnotation(t, got.Annotations, AnnotationOriginalProgressDeadlineSeconds, AnnotationValueKubernetesDefault) assertAnnotation(t, got.Annotations, AnnotationOriginalMaxUnavailable, AnnotationValueKubernetesDefault) assertAnnotation(t, got.Annotations, AnnotationOriginalMaxSurge, AnnotationValueKubernetesDefault) - assertMinReadyInflated(t, got) + assertMinReadyInflatedWithoutSurgeRequirement(t, got) } func TestMinReadyInitializeRejectsFeatureGateDisabled(t *testing.T) { @@ -136,7 +153,7 @@ func TestMinReadyInitializeRejectsFeatureGateDisabled(t *testing.T) { } } -func TestMinReadyInitializeRejectsCoveringPDB(t *testing.T) { +func TestMinReadyInitializeAllowsCoveringPDB(t *testing.T) { _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") deployment := newMinReadyDeployment() pdb := &policyv1.PodDisruptionBudget{ @@ -147,9 +164,8 @@ func TestMinReadyInitializeRejectsCoveringPDB(t *testing.T) { } control := newBuiltMinReadyControl(t, deployment, pdb) - err := control.Initialize(releaseDemo.DeepCopy()) - if err == nil || !strings.Contains(err.Error(), EventDegradedPDBIncompatible) { - t.Fatalf("Initialize error = %v, want PDB incompatible", err) + if err := control.Initialize(releaseDemo.DeepCopy()); err != nil { + t.Fatalf("Initialize failed: %v", err) } } @@ -183,6 +199,57 @@ func TestMinReadyUpgradeBatchUpdatesMaxUnavailableOnly(t *testing.T) { } } +func TestMinReadyUpgradeBatchRejectsStrategyTypeDrift(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newInflatedMinReadyDeployment() + addMinReadyOriginalAnnotations(deployment) + deployment.Spec.Strategy.Type = apps.RecreateDeploymentStrategyType + control := newBuiltMinReadyControl(t, deployment) + ctx := &batchcontext.BatchContext{ + CurrentBatch: 1, + Replicas: 10, + DesiredUpdatedReplicas: 5, + } + + err := control.UpgradeBatch(ctx) + if err == nil || !strings.Contains(err.Error(), EventDegradedDriftDetected) { + t.Fatalf("UpgradeBatch error = %v, want strategy type drift detected", err) + } +} + +func TestMinReadyUpgradeBatchRestoresInflatedStrategyFields(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newInflatedMinReadyDeployment() + addMinReadyOriginalAnnotations(deployment) + deployment.Spec.MinReadySeconds = 7 + deployment.Spec.ProgressDeadlineSeconds = pointer.Int32(60) + deployment.Spec.Strategy.RollingUpdate = nil + control := newBuiltMinReadyControl(t, deployment) + ctx := &batchcontext.BatchContext{ + CurrentBatch: 1, + Replicas: 10, + DesiredUpdatedReplicas: 5, + } + + if err := control.UpgradeBatch(ctx); err != nil { + t.Fatalf("UpgradeBatch failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + if got.Spec.MinReadySeconds != InflatedMinReadySeconds { + t.Fatalf("minReadySeconds = %d, want %d", got.Spec.MinReadySeconds, InflatedMinReadySeconds) + } + if got.Spec.ProgressDeadlineSeconds == nil || *got.Spec.ProgressDeadlineSeconds != InflatedProgressDeadlineSeconds { + t.Fatalf("progressDeadlineSeconds = %v, want %d", got.Spec.ProgressDeadlineSeconds, InflatedProgressDeadlineSeconds) + } + if got.Spec.Strategy.RollingUpdate == nil { + t.Fatalf("rollingUpdate is nil, want restored strategy") + } + if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 5 { + t.Fatalf("maxUnavailable = %v, want 5", unavailable) + } +} + func TestMinReadyCalculateBatchContextUsesUpdatedReadyReplicas(t *testing.T) { release := releaseDemo.DeepCopy() release.Status.CanaryStatus.CurrentBatch = 1 @@ -192,8 +259,10 @@ func TestMinReadyCalculateBatchContextUsesUpdatedReadyReplicas(t *testing.T) { deployment.Status.Replicas = 10 deployment.Status.UpdatedReplicas = 5 deployment.Status.ReadyReplicas = 5 + addMinReadyOriginalAnnotations(deployment) rs := newMinReadyReplicaSet(deployment, updateRevision, 5, 5) - control := newBuiltMinReadyControl(t, deployment, rs) + pods := newMinReadyUpdatedPods(deployment, rs, updateRevision, "", 5, 5) + control := newBuiltMinReadyControl(t, deployment, appendPodObjects([]interface{}{rs}, pods)...) ctx, err := control.CalculateBatchContext(release) if err != nil { @@ -204,7 +273,7 @@ func TestMinReadyCalculateBatchContextUsesUpdatedReadyReplicas(t *testing.T) { t.Fatalf("desired/planned = %d/%d, want 5/5", ctx.DesiredUpdatedReplicas, ctx.PlannedUpdatedReplicas) } if ctx.UpdatedReadyReplicas != 5 { - t.Fatalf("UpdatedReadyReplicas = %d, want updated RS ready 5", ctx.UpdatedReadyReplicas) + t.Fatalf("UpdatedReadyReplicas = %d, want updated available pods 5", ctx.UpdatedReadyReplicas) } if err := ctx.IsBatchReady(); err != nil { t.Fatalf("IsBatchReady failed: %v", err) @@ -220,15 +289,17 @@ func TestMinReadyCalculateBatchContextIgnoresOldReadyPods(t *testing.T) { deployment.Status.Replicas = 10 deployment.Status.UpdatedReplicas = 5 deployment.Status.ReadyReplicas = 10 + addMinReadyOriginalAnnotations(deployment) rs := newMinReadyReplicaSet(deployment, updateRevision, 5, 1) - control := newBuiltMinReadyControl(t, deployment, rs) + pods := newMinReadyUpdatedPods(deployment, rs, updateRevision, "", 5, 1) + control := newBuiltMinReadyControl(t, deployment, appendPodObjects([]interface{}{rs}, pods)...) ctx, err := control.CalculateBatchContext(release) if err != nil { t.Fatalf("CalculateBatchContext failed: %v", err) } if ctx.UpdatedReadyReplicas != 1 { - t.Fatalf("UpdatedReadyReplicas = %d, want 1 from updated RS only", ctx.UpdatedReadyReplicas) + t.Fatalf("UpdatedReadyReplicas = %d, want 1 from updated available pods only", ctx.UpdatedReadyReplicas) } if err := ctx.IsBatchReady(); err == nil { t.Fatalf("IsBatchReady succeeded, want not ready error") @@ -245,6 +316,7 @@ func TestMinReadyCalculateBatchContextRequiresPodListingForRolloutID(t *testing. deployment.Status.Replicas = 10 deployment.Status.UpdatedReplicas = 5 deployment.Status.ReadyReplicas = 5 + addMinReadyOriginalAnnotations(deployment) rs := newMinReadyReplicaSet(deployment, updateRevision, 5, 5) control := newBuiltMinReadyControl(t, deployment, rs) @@ -270,6 +342,7 @@ func TestMinReadyCalculateBatchContextCountsReadyPodsWhenListed(t *testing.T) { deployment.Status.Replicas = 10 deployment.Status.UpdatedReplicas = 5 deployment.Status.ReadyReplicas = 10 + addMinReadyOriginalAnnotations(deployment) rs := newMinReadyReplicaSet(deployment, updateRevision, 5, 1) pods := newMinReadyUpdatedPods(deployment, rs, updateRevision, "rollout-1", 5, 3) control := newBuiltMinReadyControl(t, deployment, rs, pods[0], pods[1], pods[2], pods[3], pods[4]) @@ -298,8 +371,10 @@ func TestMinReadyCalculateBatchContextNotReady(t *testing.T) { deployment.Status.Replicas = 10 deployment.Status.UpdatedReplicas = 5 deployment.Status.ReadyReplicas = 4 + addMinReadyOriginalAnnotations(deployment) rs := newMinReadyReplicaSet(deployment, updateRevision, 5, 4) - control := newBuiltMinReadyControl(t, deployment, rs) + pods := newMinReadyUpdatedPods(deployment, rs, updateRevision, "", 5, 4) + control := newBuiltMinReadyControl(t, deployment, appendPodObjects([]interface{}{rs}, pods)...) ctx, err := control.CalculateBatchContext(release) if err != nil { @@ -320,8 +395,10 @@ func TestMinReadyCalculateBatchContextRecomputesAfterScaling(t *testing.T) { deployment.Status.Replicas = 20 deployment.Status.UpdatedReplicas = 10 deployment.Status.ReadyReplicas = 10 + addMinReadyOriginalAnnotations(deployment) rs := newMinReadyReplicaSet(deployment, updateRevision, 10, 10) - control := newBuiltMinReadyControl(t, deployment, rs) + pods := newMinReadyUpdatedPods(deployment, rs, updateRevision, "", 10, 10) + control := newBuiltMinReadyControl(t, deployment, appendPodObjects([]interface{}{rs}, pods)...) ctx, err := control.CalculateBatchContext(release) if err != nil { @@ -338,6 +415,7 @@ func TestMinReadyCalculateBatchContextReplicasZero(t *testing.T) { deployment := newMinReadyDeployment() deployment.Spec.Replicas = pointer.Int32(0) deployment.Status.Replicas = 0 + addMinReadyOriginalAnnotations(deployment) control := newBuiltMinReadyControl(t, deployment) ctx, err := control.CalculateBatchContext(release) diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_finalize_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_finalize_test.go index 65125d03..cab86a88 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_finalize_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_finalize_test.go @@ -81,8 +81,8 @@ func TestMinReadyFinalizeRestoresKubernetesDefaults(t *testing.T) { } } -func TestMinReadyFinalizeNoopWhenAnnotationsAbsent(t *testing.T) { - deployment := newInflatedMinReadyDeployment() +func TestMinReadyFinalizeNoopWhenAnnotationsAbsentAndFieldsRestored(t *testing.T) { + deployment := newMinReadyDeployment() deployment.Annotations = nil control := newBuiltMinReadyControl(t, deployment) @@ -90,6 +90,22 @@ func TestMinReadyFinalizeNoopWhenAnnotationsAbsent(t *testing.T) { t.Fatalf("Finalize failed: %v", err) } + got := fetchMinReadyDeployment(t, control) + if got.Spec.MinReadySeconds != 7 { + t.Fatalf("minReadySeconds = %d, want original value 7", got.Spec.MinReadySeconds) + } +} + +func TestMinReadyFinalizeRejectsMissingAnnotationsWhileFieldsInflated(t *testing.T) { + deployment := newInflatedMinReadyDeployment() + deployment.Annotations = nil + control := newBuiltMinReadyControl(t, deployment) + + err := control.Finalize(releaseDemo.DeepCopy()) + if err == nil || !strings.Contains(err.Error(), "annotation state missing") { + t.Fatalf("Finalize error = %v, want missing annotation state error", err) + } + got := fetchMinReadyDeployment(t, control) assertMinReadyInflated(t, got) } diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_pdb.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_pdb.go deleted file mode 100644 index 026e08b3..00000000 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_pdb.go +++ /dev/null @@ -1,51 +0,0 @@ -/* -Copyright 2026 The Kruise Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package deployment - -import ( - "context" - "fmt" - - policyv1 "k8s.io/api/policy/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" - "sigs.k8s.io/controller-runtime/pkg/client" -) - -func (mc *MinReadyControl) hasPDBCoveringDeployment() (bool, error) { - pdbList := &policyv1.PodDisruptionBudgetList{} - if err := mc.client.List(context.TODO(), pdbList, client.InNamespace(mc.object.Namespace)); err != nil { - return false, fmt.Errorf("%s: list PDBs: %w", EventDegradedPDBIncompatible, err) - } - templateLabels := labels.Set(mc.object.Spec.Template.Labels) - for i := range pdbList.Items { - covered, err := pdbCoversLabels(&pdbList.Items[i], templateLabels) - if err != nil || covered { - return covered, err - } - } - return false, nil -} - -func pdbCoversLabels(pdb *policyv1.PodDisruptionBudget, templateLabels labels.Set) (bool, error) { - selector, err := metav1.LabelSelectorAsSelector(pdb.Spec.Selector) - if err != nil { - return false, fmt.Errorf("%s: invalid PDB selector %s/%s: %w", - EventDegradedPDBIncompatible, pdb.Namespace, pdb.Name, err) - } - return selector.Matches(templateLabels), nil -} diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_test_helpers_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_test_helpers_test.go index 8eb20256..80ef17f1 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_test_helpers_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_test_helpers_test.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "testing" + "time" apps "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -113,6 +114,39 @@ func assertMinReadyInflated(t *testing.T, deployment *apps.Deployment) { } } +func assertMinReadyInflatedWithoutSurgeRequirement(t *testing.T, deployment *apps.Deployment) { + t.Helper() + if deployment.Spec.Strategy.Type != apps.RollingUpdateDeploymentStrategyType { + t.Fatalf("strategy.type = %q, want RollingUpdate", deployment.Spec.Strategy.Type) + } + if deployment.Spec.MinReadySeconds != InflatedMinReadySeconds { + t.Fatalf("minReadySeconds = %d, want %d", deployment.Spec.MinReadySeconds, InflatedMinReadySeconds) + } + if deployment.Spec.ProgressDeadlineSeconds == nil || *deployment.Spec.ProgressDeadlineSeconds != InflatedProgressDeadlineSeconds { + t.Fatalf("progressDeadlineSeconds = %v, want %d", deployment.Spec.ProgressDeadlineSeconds, InflatedProgressDeadlineSeconds) + } + if got := deployment.Spec.Strategy.RollingUpdate.MaxUnavailable; got == nil || got.IntVal != 0 { + t.Fatalf("maxUnavailable = %v, want 0", got) + } +} + +func addMinReadyOriginalAnnotations(deployment *apps.Deployment) { + if deployment.Annotations == nil { + deployment.Annotations = map[string]string{} + } + deployment.Annotations[AnnotationOriginalMinReadySeconds] = "7" + deployment.Annotations[AnnotationOriginalProgressDeadlineSeconds] = "60" + deployment.Annotations[AnnotationOriginalMaxUnavailable] = "25%" + deployment.Annotations[AnnotationOriginalMaxSurge] = "1" +} + +func appendPodObjects(objects []interface{}, pods []*corev1.Pod) []interface{} { + for _, pod := range pods { + objects = append(objects, pod) + } + return objects +} + func assertAnnotation(t *testing.T, annotations map[string]string, key, want string) { t.Helper() if got := annotations[key]; got != want { @@ -178,8 +212,9 @@ func newMinReadyUpdatedPods(deployment *apps.Deployment, rs *apps.ReplicaSet, up }, Status: corev1.PodStatus{ Conditions: []corev1.PodCondition{{ - Type: corev1.PodReady, - Status: readyCondition, + Type: corev1.PodReady, + Status: readyCondition, + LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Second)), }}, }, } diff --git a/pkg/controller/batchrelease/control/partitionstyle/minready_status.go b/pkg/controller/batchrelease/control/partitionstyle/minready_status.go index b3640f71..e8e17d82 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/minready_status.go +++ b/pkg/controller/batchrelease/control/partitionstyle/minready_status.go @@ -17,18 +17,28 @@ limitations under the License. package partitionstyle import ( + "reflect" "strings" "time" + apps "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" "github.com/openkruise/rollouts/api/v1beta1" brmetrics "github.com/openkruise/rollouts/pkg/controller/batchrelease/metrics" + "github.com/openkruise/rollouts/pkg/feature" "github.com/openkruise/rollouts/pkg/util" + utilfeature "github.com/openkruise/rollouts/pkg/util/feature" ) func (rc *realBatchControlPlane) isMinReadyRelease() bool { - return rc.release.Spec.ReleasePlan.DeploymentStrategy == v1beta1.DeploymentStrategyMinReadySeconds + if rc.release == nil || !utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) { + return false + } + targetRef := rc.release.Spec.WorkloadRef + return targetRef.APIVersion == apps.SchemeGroupVersion.String() && + targetRef.Kind == reflect.TypeOf(apps.Deployment{}).Name() && + rc.release.Spec.ReleasePlan.RollingStyle == v1beta1.PartitionRollingStyle } func (rc *realBatchControlPlane) recordMinReadyNormal(condType v1beta1.RolloutConditionType, reason, message string) { @@ -38,13 +48,17 @@ func (rc *realBatchControlPlane) recordMinReadyNormal(condType v1beta1.RolloutCo previousCondition := util.GetBatchReleaseCondition(*rc.newStatus, condType) condition := util.NewRolloutCondition(condType, v1.ConditionTrue, reason, message) util.SetBatchReleaseCondition(rc.newStatus, *condition) - clearMinReadyDegraded(rc.newStatus) - rc.newStatus.Message = "" + if reason == "MinReadyFinalized" { + clearMinReadyDegraded(rc.newStatus) + rc.newStatus.Message = "" + } if reason == "MinReadyBatchReady" { observeMinReadyBatchDuration(rc.release, previousCondition) brmetrics.RecordMinReadyBatch(rc.release, brmetrics.BatchResultSuccess) } - brmetrics.ClearMinReadyStuckSeconds(rc.release, brmetrics.StuckReasonBatchReadyTimeout) + if reason == "MinReadyBatchReady" || reason == "MinReadyFinalized" { + brmetrics.ClearMinReadyStuckSeconds(rc.release, brmetrics.StuckReasonBatchReadyTimeout) + } rc.Event(rc.release, v1.EventTypeNormal, reason, message) } @@ -91,36 +105,41 @@ func clearMinReadyDegraded(status *v1beta1.BatchReleaseStatus) { util.SetBatchReleaseCondition(status, *condition) } +type minReadyDegradedReason struct { + metric string + event string +} + func minReadyDegradedMetricReason(message string) string { - switch { - case strings.Contains(message, "feature gate is disabled"): - return brmetrics.DegradedReasonFeatureGateDisabled - case strings.Contains(message, "MinReadyDegradedPDBIncompatible"): - return brmetrics.DegradedReasonPDBIncompatible - case strings.Contains(message, "annotation ") && strings.Contains(message, "missing"): - return brmetrics.DegradedReasonMissingAnnotations - case strings.Contains(message, "annotation ") && strings.Contains(message, "malformed"): - return brmetrics.DegradedReasonMissingAnnotations - case strings.Contains(message, "MinReadyDegradedDriftDetected"): - return brmetrics.DegradedReasonGitOpsDrift - default: - return brmetrics.DegradedReasonControllerError - } + return classifyMinReadyDegradedReason("", message).metric } func minReadyDegradedEventReason(fallback, message string) string { + return classifyMinReadyDegradedReason(fallback, message).event +} + +func classifyMinReadyDegradedReason(fallback, message string) minReadyDegradedReason { + eventReason := fallback + metricReason := brmetrics.DegradedReasonControllerError switch { case strings.Contains(message, "feature gate is disabled"): - return "MinReadyFeatureGateDisabled" - case strings.Contains(message, "MinReadyDegradedPDBIncompatible"): - return "MinReadyDegradedPDBIncompatible" + metricReason = brmetrics.DegradedReasonFeatureGateDisabled + eventReason = "MinReadyFeatureGateDisabled" case strings.Contains(message, "annotation ") && strings.Contains(message, "missing"): - return "MinReadyDegradedMissingAnnotations" + metricReason = brmetrics.DegradedReasonMissingAnnotations + eventReason = "MinReadyDegradedMissingAnnotations" + case strings.Contains(message, "annotation ") && strings.Contains(message, "empty"): + metricReason = brmetrics.DegradedReasonMissingAnnotations + eventReason = "MinReadyDegradedMissingAnnotations" case strings.Contains(message, "annotation ") && strings.Contains(message, "malformed"): - return "MinReadyDegradedMissingAnnotations" + metricReason = brmetrics.DegradedReasonMissingAnnotations + eventReason = "MinReadyDegradedMissingAnnotations" case strings.Contains(message, "MinReadyDegradedDriftDetected"): - return "MinReadyDegradedDriftDetected" - default: - return fallback + metricReason = brmetrics.DegradedReasonGitOpsDrift + eventReason = "MinReadyDegradedDriftDetected" + } + return minReadyDegradedReason{ + metric: metricReason, + event: eventReason, } } diff --git a/pkg/controller/batchrelease/control/partitionstyle/minready_status_test.go b/pkg/controller/batchrelease/control/partitionstyle/minready_status_test.go index 876d227c..0806e72a 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/minready_status_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/minready_status_test.go @@ -21,6 +21,7 @@ import ( "time" dto "github.com/prometheus/client_model/go" + apps "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/record" @@ -28,14 +29,18 @@ import ( "github.com/openkruise/rollouts/api/v1beta1" brmetrics "github.com/openkruise/rollouts/pkg/controller/batchrelease/metrics" + "github.com/openkruise/rollouts/pkg/feature" "github.com/openkruise/rollouts/pkg/util" + utilfeature "github.com/openkruise/rollouts/pkg/util/feature" ) func TestRecordMinReadyNormalObservesBatchDuration(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") release := &v1beta1.BatchRelease{ ObjectMeta: metav1.ObjectMeta{Name: "duration-rollout", Namespace: "default"}, Spec: v1beta1.BatchReleaseSpec{ - ReleasePlan: v1beta1.ReleasePlan{DeploymentStrategy: v1beta1.DeploymentStrategyMinReadySeconds}, + WorkloadRef: v1beta1.ObjectRef{APIVersion: apps.SchemeGroupVersion.String(), Kind: "Deployment", Name: "demo"}, + ReleasePlan: v1beta1.ReleasePlan{RollingStyle: v1beta1.PartitionRollingStyle}, }, } status := &v1beta1.BatchReleaseStatus{} @@ -69,6 +74,48 @@ func TestRecordMinReadyNormalObservesBatchDuration(t *testing.T) { } } +func TestRecordMinReadyNormalKeepsDegradedUntilFinalize(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := &v1beta1.BatchRelease{ + ObjectMeta: metav1.ObjectMeta{Name: "degraded-rollout", Namespace: "default"}, + Spec: v1beta1.BatchReleaseSpec{ + WorkloadRef: v1beta1.ObjectRef{APIVersion: apps.SchemeGroupVersion.String(), Kind: "Deployment", Name: "demo"}, + ReleasePlan: v1beta1.ReleasePlan{RollingStyle: v1beta1.PartitionRollingStyle}, + }, + } + status := &v1beta1.BatchReleaseStatus{Message: "annotation missing"} + util.SetBatchReleaseCondition(status, v1beta1.RolloutCondition{ + Type: v1beta1.RolloutConditionMinReadyDegraded, + Status: v1.ConditionTrue, + Reason: "MinReadyDegradedMissingAnnotations", + }) + rc := &realBatchControlPlane{ + EventRecorder: record.NewFakeRecorder(2), + release: release, + newStatus: status, + } + + rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatching", "MinReadySeconds strategy advanced the current batch") + + degraded := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyDegraded) + if degraded == nil || degraded.Status != v1.ConditionTrue { + t.Fatalf("degraded condition = %v, want still true after batching", degraded) + } + if status.Message != "annotation missing" { + t.Fatalf("status.message = %q, want previous degraded message", status.Message) + } + + rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyFinalized, "MinReadyFinalized", "MinReadySeconds strategy finalized") + + degraded = util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyDegraded) + if degraded == nil || degraded.Status != v1.ConditionFalse { + t.Fatalf("degraded condition = %v, want false after finalize", degraded) + } + if status.Message != "" { + t.Fatalf("status.message = %q, want empty after finalize", status.Message) + } +} + func TestObserveMinReadyBatchWaitSetsStuckGauge(t *testing.T) { release := &v1beta1.BatchRelease{ ObjectMeta: metav1.ObjectMeta{Name: "stuck-rollout", Namespace: "default"}, diff --git a/pkg/controller/batchrelease/metrics/minready_metrics.go b/pkg/controller/batchrelease/metrics/minready_metrics.go index f90ef6c3..6bc85069 100644 --- a/pkg/controller/batchrelease/metrics/minready_metrics.go +++ b/pkg/controller/batchrelease/metrics/minready_metrics.go @@ -34,7 +34,6 @@ const ( DegradedReasonFeatureGateDisabled = "feature_gate_disabled" DegradedReasonGitOpsDrift = "gitops_drift" DegradedReasonMissingAnnotations = "missing_annotations" - DegradedReasonPDBIncompatible = "pdb_incompatible" StuckReasonBatchReadyTimeout = "batch_ready_timeout" ) diff --git a/pkg/controller/batchrelease/metrics/minready_metrics_test.go b/pkg/controller/batchrelease/metrics/minready_metrics_test.go index 4e09f6fb..c0beac37 100644 --- a/pkg/controller/batchrelease/metrics/minready_metrics_test.go +++ b/pkg/controller/batchrelease/metrics/minready_metrics_test.go @@ -39,7 +39,7 @@ func TestMinReadyMetricsRecorders(t *testing.T) { ObserveMinReadyBatchDuration(release, 2*time.Second) SetMinReadyStuckSeconds(release, StuckReasonBatchReadyTimeout, 3) ClearMinReadyStuckSeconds(release, StuckReasonBatchReadyTimeout) - RecordMinReadyDegraded(release, DegradedReasonPDBIncompatible) + RecordMinReadyDegraded(release, DegradedReasonControllerError) assertCounterPositive(t, minReadyBatchesTotal.WithLabelValues("rollout-a", "default", BatchResultSuccess)) histogram, ok := minReadyBatchDurationSeconds.WithLabelValues("rollout-a", "default").(prometheus.Metric) @@ -48,7 +48,7 @@ func TestMinReadyMetricsRecorders(t *testing.T) { } assertHistogramCountPositive(t, histogram) assertGaugeValue(t, minReadyStuckSeconds.WithLabelValues("rollout-a", "default", StuckReasonBatchReadyTimeout), 0) - assertCounterPositive(t, minReadyDegradedTotal.WithLabelValues("rollout-a", "default", DegradedReasonPDBIncompatible)) + assertCounterPositive(t, minReadyDegradedTotal.WithLabelValues("rollout-a", "default", DegradedReasonControllerError)) } func assertCounterPositive(t *testing.T, metric interface{ Write(*dto.Metric) error }) { diff --git a/pkg/controller/rollout/rollout_canary.go b/pkg/controller/rollout/rollout_canary.go index 9969737b..0cecbac2 100644 --- a/pkg/controller/rollout/rollout_canary.go +++ b/pkg/controller/rollout/rollout_canary.go @@ -411,7 +411,6 @@ func (m *canaryReleaseManager) createBatchRelease(rollout *v1beta1.Rollout, roll FailureThreshold: rollout.Spec.Strategy.Canary.FailureThreshold, PatchPodTemplateMetadata: rollout.Spec.Strategy.Canary.PatchPodTemplateMetadata, RollingStyle: rollout.Spec.Strategy.GetRollingStyle(), - DeploymentStrategy: rollout.Spec.Strategy.Canary.DeploymentStrategy, EnableExtraWorkloadForCanary: rollout.Spec.Strategy.Canary.EnableExtraWorkloadForCanary, }, }, diff --git a/pkg/controller/rollout/rollout_releaseManager_test.go b/pkg/controller/rollout/rollout_releaseManager_test.go index 69d118ec..6f8e1343 100644 --- a/pkg/controller/rollout/rollout_releaseManager_test.go +++ b/pkg/controller/rollout/rollout_releaseManager_test.go @@ -342,26 +342,6 @@ func (m *mockReleaseManager) createBatchRelease(rollout *v1beta1.Rollout, rollou return br } -func TestCanaryReleaseManagerPassesDeploymentStrategyToBatchRelease(t *testing.T) { - rollout := &v1beta1.Rollout{ - ObjectMeta: metav1.ObjectMeta{Name: "my-rollout", Namespace: "default"}, - Spec: v1beta1.RolloutSpec{ - WorkloadRef: v1beta1.ObjectRef{APIVersion: "apps/v1", Kind: "Deployment", Name: "my-app"}, - Strategy: v1beta1.RolloutStrategy{ - Canary: &v1beta1.CanaryStrategy{ - DeploymentStrategy: v1beta1.DeploymentStrategyMinReadySeconds, - Steps: []v1beta1.CanaryStep{ - {Replicas: &intstr.IntOrString{Type: intstr.String, StrVal: "10%"}}, - }, - }, - }, - }, - } - - br := (&canaryReleaseManager{}).createBatchRelease(rollout, "rollout-id", 0, false) - assert.Equal(t, v1beta1.DeploymentStrategyMinReadySeconds, br.Spec.ReleasePlan.DeploymentStrategy) -} - func TestRunBatchRelease(t *testing.T) { rollout := &v1beta1.Rollout{ ObjectMeta: metav1.ObjectMeta{Name: "my-rollout", Namespace: "default", UID: "rollout-uid-12345"}, diff --git a/pkg/webhook/workload/mutating/workload_update_handler.go b/pkg/webhook/workload/mutating/workload_update_handler.go index ced5d3cf..128f796e 100644 --- a/pkg/webhook/workload/mutating/workload_update_handler.go +++ b/pkg/webhook/workload/mutating/workload_update_handler.go @@ -41,6 +41,7 @@ import ( appsv1alpha1 "github.com/openkruise/rollouts/api/v1alpha1" appsv1beta1 "github.com/openkruise/rollouts/api/v1beta1" + partitiondeployment "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment" "github.com/openkruise/rollouts/pkg/feature" "github.com/openkruise/rollouts/pkg/util" utilclient "github.com/openkruise/rollouts/pkg/util/client" @@ -241,7 +242,7 @@ func (h *WorkloadHandler) handleDeployment(newObj, oldObj *apps.Deployment) (boo if newObj.Annotations[util.InRolloutProgressingAnnotation] != "" { modified := false if shouldSkipRecreateMutationForMinReady(rollout) { - return false, nil + return enforceMinReadyInflation(newObj), nil } strategy := util.GetDeploymentStrategy(newObj) // partition @@ -460,10 +461,36 @@ func isEffectiveDeploymentRevisionChange(oldObj, newObj *apps.Deployment) bool { func shouldSkipRecreateMutationForMinReady(rollout *appsv1beta1.Rollout) bool { return rollout.Spec.Strategy.Canary != nil && - rollout.Spec.Strategy.Canary.DeploymentStrategy == appsv1beta1.DeploymentStrategyMinReadySeconds && + !rollout.Spec.Strategy.Canary.EnableExtraWorkloadForCanary && utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) } +func enforceMinReadyInflation(deployment *apps.Deployment) bool { + if !hasMinReadyOriginalAnnotations(deployment.Annotations) { + return false + } + modified := false + if deployment.Spec.MinReadySeconds != partitiondeployment.InflatedMinReadySeconds { + deployment.Spec.MinReadySeconds = partitiondeployment.InflatedMinReadySeconds + modified = true + } + if deployment.Spec.ProgressDeadlineSeconds == nil || *deployment.Spec.ProgressDeadlineSeconds != partitiondeployment.InflatedProgressDeadlineSeconds { + progressDeadlineSeconds := partitiondeployment.InflatedProgressDeadlineSeconds + deployment.Spec.ProgressDeadlineSeconds = &progressDeadlineSeconds + modified = true + } + return modified +} + +func hasMinReadyOriginalAnnotations(annotations map[string]string) bool { + for _, key := range partitiondeployment.AllOriginalAnnotations { + if _, ok := annotations[key]; ok { + return true + } + } + return false +} + func setDeploymentStrategyAnnotation(strategy appsv1alpha1.DeploymentStrategy, d *apps.Deployment) { strategyAnno, _ := json.Marshal(&strategy) d.Annotations[appsv1alpha1.DeploymentStrategyAnnotation] = string(strategyAnno) diff --git a/pkg/webhook/workload/mutating/workload_update_handler_test.go b/pkg/webhook/workload/mutating/workload_update_handler_test.go index 788e3df9..43b54afd 100644 --- a/pkg/webhook/workload/mutating/workload_update_handler_test.go +++ b/pkg/webhook/workload/mutating/workload_update_handler_test.go @@ -442,9 +442,7 @@ func TestHandlerDeployment(t *testing.T) { }, getRollout: func() *appsv1beta1.Rollout { _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") - obj := rolloutDemo.DeepCopy() - obj.Spec.Strategy.Canary.DeploymentStrategy = appsv1beta1.DeploymentStrategyMinReadySeconds - return obj + return rolloutDemo.DeepCopy() }, }, { @@ -604,9 +602,7 @@ func TestHandlerDeployment(t *testing.T) { }, getRollout: func() *appsv1beta1.Rollout { _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") - obj := rolloutDemo.DeepCopy() - obj.Spec.Strategy.Canary.DeploymentStrategy = appsv1beta1.DeploymentStrategyMinReadySeconds - return obj + return rolloutDemo.DeepCopy() }, }, { @@ -632,9 +628,7 @@ func TestHandlerDeployment(t *testing.T) { }, getRollout: func() *appsv1beta1.Rollout { _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") - obj := rolloutDemo.DeepCopy() - obj.Spec.Strategy.Canary.DeploymentStrategy = appsv1beta1.DeploymentStrategyMinReadySeconds - return obj + return rolloutDemo.DeepCopy() }, }, { @@ -840,6 +834,7 @@ func TestHandlerDeployment(t *testing.T) { decoder := admission.NewDecoder(scheme) for _, cs := range cases { t.Run(cs.name, func(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=false") client := fake.NewClientBuilder().WithScheme(scheme).Build() h := WorkloadHandler{ Client: client, @@ -876,7 +871,6 @@ func TestHandlerDeployment(t *testing.T) { func TestShouldSkipRecreateMutationForMinReady(t *testing.T) { rollout := rolloutDemo.DeepCopy() - rollout.Spec.Strategy.Canary.DeploymentStrategy = appsv1beta1.DeploymentStrategyMinReadySeconds _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=false") if shouldSkipRecreateMutationForMinReady(rollout) { t.Fatalf("skip returned true while feature gate is disabled") @@ -885,9 +879,9 @@ func TestShouldSkipRecreateMutationForMinReady(t *testing.T) { if !shouldSkipRecreateMutationForMinReady(rollout) { t.Fatalf("skip returned false for MinReadySeconds with feature gate enabled") } - rollout.Spec.Strategy.Canary.DeploymentStrategy = appsv1beta1.DeploymentStrategyRecreate + rollout.Spec.Strategy.Canary.EnableExtraWorkloadForCanary = true if shouldSkipRecreateMutationForMinReady(rollout) { - t.Fatalf("skip returned true for Recreate strategy") + t.Fatalf("skip returned true for canary-style rollout") } } diff --git a/test/e2e/deployment_minready_helpers_test.go b/test/e2e/deployment_minready_helpers_test.go index f72d39a3..bb10f534 100644 --- a/test/e2e/deployment_minready_helpers_test.go +++ b/test/e2e/deployment_minready_helpers_test.go @@ -78,7 +78,6 @@ func newMinReadyE2ERollout(namespace string) *v1beta1.Rollout { Spec: v1beta1.RolloutSpec{ WorkloadRef: v1beta1.ObjectRef{APIVersion: "apps/v1", Kind: "Deployment", Name: minReadyE2EDeploymentName}, Strategy: v1beta1.RolloutStrategy{Canary: &v1beta1.CanaryStrategy{ - DeploymentStrategy: v1beta1.DeploymentStrategyMinReadySeconds, EnableExtraWorkloadForCanary: false, Steps: []v1beta1.CanaryStep{ {Replicas: intstrPtr(intstr.FromString("20%")), Pause: v1beta1.RolloutPause{}}, diff --git a/test/e2e/deployment_minready_pdb_test.go b/test/e2e/deployment_minready_pdb_test.go index 4f5b8d45..4e0dbefb 100644 --- a/test/e2e/deployment_minready_pdb_test.go +++ b/test/e2e/deployment_minready_pdb_test.go @@ -50,8 +50,8 @@ var _ = SIGDescribe("Deployment MinReadySeconds PDB", func() { time.Sleep(3 * time.Second) }) - KruiseDescribe("MinReadySeconds PDB guard", func() { - It("records degraded status and leaves Deployment strategy untouched", func() { + KruiseDescribe("MinReadySeconds PDB coexistence", func() { + It("continues rollout initialization and leaves Deployment strategy untouched", func() { rollout := newMinReadyE2ERollout(namespace) deployment := newMinReadyE2EDeployment(namespace) pdb := newMinReadyE2EPDB(namespace) @@ -62,12 +62,12 @@ var _ = SIGDescribe("Deployment MinReadySeconds PDB", func() { waitMinReadyE2EDeploymentReady(namespace) waitMinReadyE2ERolloutPhase(namespace, rollout.Name, v1beta1.RolloutPhaseHealthy) updateMinReadyE2EDeploymentVersion(namespace, "version2") - waitMinReadyE2EBatchCondition(namespace, rollout.Name, "MinReadyDegradedPDBIncompatible") + waitMinReadyE2EBatchCondition(namespace, rollout.Name, "MinReadyInitialized") got := &apps.Deployment{} Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(deployment), got)).Should(Succeed()) Expect(got.Spec.Strategy.Type).Should(Equal(apps.RollingUpdateDeploymentStrategyType)) - Expect(got.Spec.MinReadySeconds).ShouldNot(Equal(partitiondeployment.InflatedMinReadySeconds)) + Expect(got.Spec.MinReadySeconds).Should(Equal(partitiondeployment.InflatedMinReadySeconds)) }) }) }) diff --git a/test/integration/deployment_minready_test.go b/test/integration/deployment_minready_test.go index 84013417..99016971 100644 --- a/test/integration/deployment_minready_test.go +++ b/test/integration/deployment_minready_test.go @@ -24,6 +24,7 @@ import ( policyv1 "k8s.io/api/policy/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/record" + "sigs.k8s.io/controller-runtime/pkg/client" "github.com/openkruise/rollouts/api/v1beta1" partitiondeployment "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment" @@ -51,7 +52,6 @@ func TestDeploymentMinReadyControlPlaneInitialize(t *testing.T) { assertOriginalAnnotation(t, got, partitiondeployment.AnnotationOriginalMaxUnavailable, "25%") assertOriginalAnnotation(t, got, partitiondeployment.AnnotationOriginalMaxSurge, "1") assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyInitialized, corev1.ConditionTrue, "MinReadyInitialized") - assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyDegraded, corev1.ConditionFalse, "MinReadyHealthy") assertIntegrationEvent(t, recorder, "MinReadyInitialized") } @@ -68,16 +68,9 @@ func TestDeploymentMinReadyControlPlaneRejectsFeatureGateDisabled(t *testing.T) if err == nil || !strings.Contains(err.Error(), "feature gate is disabled") { t.Fatalf("Initialize error = %v, want feature gate disabled", err) } - - got := fetchIntegrationDeployment(t, cli, deployment) - if got.Spec.MinReadySeconds != deployment.Spec.MinReadySeconds { - t.Fatalf("minReadySeconds = %d, want unchanged %d", got.Spec.MinReadySeconds, deployment.Spec.MinReadySeconds) - } - assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyDegraded, corev1.ConditionTrue, "MinReadyFeatureGateDisabled") - assertIntegrationEvent(t, recorder, "MinReadyFeatureGateDisabled") } -func TestDeploymentMinReadyControlPlaneRejectsCoveringPDB(t *testing.T) { +func TestDeploymentMinReadyControlPlaneAllowsCoveringPDB(t *testing.T) { _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") release := newIntegrationMinReadyRelease() deployment := newIntegrationDeployment() @@ -92,17 +85,13 @@ func TestDeploymentMinReadyControlPlaneRejectsCoveringPDB(t *testing.T) { status := release.Status.DeepCopy() control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) - err := control.Initialize() - if err == nil || !strings.Contains(err.Error(), partitiondeployment.EventDegradedPDBIncompatible) { - t.Fatalf("Initialize error = %v, want PDB incompatible", err) + if err := control.Initialize(); err != nil { + t.Fatalf("Initialize failed: %v", err) } got := fetchIntegrationDeployment(t, cli, deployment) - if got.Spec.MinReadySeconds != deployment.Spec.MinReadySeconds { - t.Fatalf("minReadySeconds = %d, want unchanged %d", got.Spec.MinReadySeconds, deployment.Spec.MinReadySeconds) - } - assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyDegraded, corev1.ConditionTrue, "MinReadyDegradedPDBIncompatible") - assertIntegrationEvent(t, recorder, "MinReadyDegradedPDBIncompatible") + assertInflatedDeployment(t, got) + assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyInitialized, corev1.ConditionTrue, "MinReadyInitialized") } func TestDeploymentMinReadyControlPlaneUpgradeBatchUsesUpdatedReadyReplicas(t *testing.T) { @@ -114,8 +103,9 @@ func TestDeploymentMinReadyControlPlaneUpgradeBatchUsesUpdatedReadyReplicas(t *t deployment.Status.UpdatedReplicas = 5 deployment.Status.ReadyReplicas = 5 rs := newIntegrationUpdatedReplicaSet(deployment, release.Status.UpdateRevision, 5, 5) + pods := newIntegrationUpdatedPods(deployment, rs, release.Status.UpdateRevision, "", 5, 5) recorder := record.NewFakeRecorder(20) - cli := newIntegrationClient(release, deployment, rs) + cli := newIntegrationClient(appendIntegrationObjects([]client.Object{release, deployment, rs}, pods)...) status := release.Status.DeepCopy() control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) @@ -143,8 +133,9 @@ func TestDeploymentMinReadyControlPlaneWaitsForUpdatedReadyReplicas(t *testing.T deployment.Status.UpdatedReplicas = 5 deployment.Status.ReadyReplicas = 10 rs := newIntegrationUpdatedReplicaSet(deployment, release.Status.UpdateRevision, 5, 1) + pods := newIntegrationUpdatedPods(deployment, rs, release.Status.UpdateRevision, "", 5, 1) recorder := record.NewFakeRecorder(20) - cli := newIntegrationClient(release, deployment, rs) + cli := newIntegrationClient(appendIntegrationObjects([]client.Object{release, deployment, rs}, pods)...) status := release.Status.DeepCopy() control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) diff --git a/test/integration/minready_helpers_test.go b/test/integration/minready_helpers_test.go index ede126c9..415736c3 100644 --- a/test/integration/minready_helpers_test.go +++ b/test/integration/minready_helpers_test.go @@ -20,6 +20,7 @@ import ( "context" "strings" "testing" + "time" apps "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -59,8 +60,7 @@ func newIntegrationMinReadyRelease() *v1beta1.BatchRelease { Spec: v1beta1.BatchReleaseSpec{ WorkloadRef: v1beta1.ObjectRef{APIVersion: apps.SchemeGroupVersion.String(), Kind: "Deployment", Name: "demo"}, ReleasePlan: v1beta1.ReleasePlan{ - RollingStyle: v1beta1.PartitionRollingStyle, - DeploymentStrategy: v1beta1.DeploymentStrategyMinReadySeconds, + RollingStyle: v1beta1.PartitionRollingStyle, Batches: []v1beta1.ReleaseBatch{ {CanaryReplicas: intstr.FromString("20%")}, {CanaryReplicas: intstr.FromString("50%")}, @@ -180,6 +180,50 @@ func newIntegrationUpdatedReplicaSet(deployment *apps.Deployment, updateRevision } } +func newIntegrationUpdatedPods(deployment *apps.Deployment, rs *apps.ReplicaSet, updateRevision, rolloutID string, total, ready int) []*corev1.Pod { + pods := make([]*corev1.Pod, 0, total) + for i := 0; i < total; i++ { + readyCondition := corev1.ConditionFalse + if i < ready { + readyCondition = corev1.ConditionTrue + } + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: deployment.Name + "-pod-" + string(rune('a'+i)), + Namespace: deployment.Namespace, + Labels: map[string]string{ + "app": "demo", + apps.DefaultDeploymentUniqueLabelKey: updateRevision, + }, + OwnerReferences: []metav1.OwnerReference{{ + APIVersion: apps.SchemeGroupVersion.String(), + Kind: "ReplicaSet", + Name: rs.Name, + UID: rs.UID, + Controller: pointer.Bool(true), + }}, + }, + Status: corev1.PodStatus{Conditions: []corev1.PodCondition{{ + Type: corev1.PodReady, + Status: readyCondition, + LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Second)), + }}}, + } + if rolloutID != "" { + pod.Labels[v1beta1.RolloutIDLabel] = rolloutID + } + pods = append(pods, pod) + } + return pods +} + +func appendIntegrationObjects(objects []client.Object, pods []*corev1.Pod) []client.Object { + for _, pod := range pods { + objects = append(objects, pod) + } + return objects +} + func newIntegrationMinReadyControl( cli client.Client, recorder record.EventRecorder, From 9565a584bf1e718bf94c4b8cd200b35458dffb89 Mon Sep 17 00:00:00 2001 From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Date: Wed, 10 Jun 2026 14:50:37 +0800 Subject: [PATCH 04/22] fix: write BatchReleaseControlAnnotation and clean up finalize labels Initialize now writes BatchReleaseControlAnnotation to mark the Deployment as controlled by a specific BatchRelease, consistent with the CloneSet batch release pattern. Finalize now cleans up both BatchReleaseControlAnnotation and DeploymentStableRevisionLabel to ensure the Deployment is fully released after rollout completes. Also removes premature user-facing docs (quickstart, migration guide, runbook) that will be added in a follow-up after the feature stabilizes. Signed-off-by: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> --- .../deployment_min_ready_quickstart.md | 156 ------------------ docs/migration/recreate_to_minready.md | 83 ---------- docs/operator/deployment_min_ready_runbook.md | 134 --------------- .../deployment/minready_control.go | 12 +- .../deployment/minready_control_test.go | 1 + .../deployment/minready_finalize_test.go | 13 ++ 6 files changed, 25 insertions(+), 374 deletions(-) delete mode 100644 docs/getting_started/deployment_min_ready_quickstart.md delete mode 100644 docs/migration/recreate_to_minready.md delete mode 100644 docs/operator/deployment_min_ready_runbook.md diff --git a/docs/getting_started/deployment_min_ready_quickstart.md b/docs/getting_started/deployment_min_ready_quickstart.md deleted file mode 100644 index 4ec4360c..00000000 --- a/docs/getting_started/deployment_min_ready_quickstart.md +++ /dev/null @@ -1,156 +0,0 @@ -# Deployment MinReadySeconds Quickstart - -This guide shows how to enable the native Deployment MinReadySeconds rollout strategy in Kruise Rollouts. - -## What this strategy does - -MinReadySeconds keeps native Kubernetes `Deployment.spec.strategy.type` unchanged and relies on inflated rollout fields plus Kruise Rollouts orchestration to advance batches. It is intended for users who want controlled, batch-based rollout behavior without switching the workload to Recreate. - -The controller writes and later restores these original Deployment fields: - -- `spec.minReadySeconds` -- `spec.progressDeadlineSeconds` -- `spec.strategy.rollingUpdate.maxUnavailable` -- `spec.strategy.rollingUpdate.maxSurge` - -The feature gate is `MinReadySecondsStrategy` and it is disabled by default. - -## When to use - -Use this strategy when: - -- you want native Deployment semantics to stay in place -- you need batch-based rollout control -- you want the controller to restore the original Deployment fields automatically - -Do not use it when: - -- a PodDisruptionBudget covers the target workload -- you need a traffic-routing canary instead of a native Deployment rollout -- you cannot tolerate long Ready-but-not-Available periods during rollout - -## Before you start - -- Kubernetes cluster with Kruise Rollouts installed -- A `Deployment` managed by a `Rollout` -- `MinReadySecondsStrategy=true` enabled in the controller feature gate -- No PodDisruptionBudget covering the target Deployment namespace and selector - -If a matching PDB exists, initialization is rejected and the rollout enters `MinReadyDegraded`. - -## Minimal rollout example - -```yaml -apiVersion: rollouts.kruise.io/v1beta1 -kind: Rollout -metadata: - name: demo-rollout -spec: - strategy: - canary: - deploymentStrategy: MinReadySeconds - steps: - - replicas: 20% - - replicas: 50% - - replicas: 100% -``` - -The associated Deployment should keep a normal RollingUpdate strategy. Kruise Rollouts will inflate the live fields during rollout and restore the original values on finalize. - -Example workload: - -```yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: demo-deploy -spec: - replicas: 3 - selector: - matchLabels: - app: demo - template: - metadata: - labels: - app: demo - spec: - containers: - - name: app - image: nginx:1.27 -``` - -## Enable the feature gate - -Set the controller feature gate to: - -```bash -MinReadySecondsStrategy=true -``` - -Without this gate, the controller rejects MinReadySeconds rollouts and records a warning event. - -## Five minute smoke test - -1. Apply the Rollout and Deployment. -2. Enable `MinReadySecondsStrategy=true` on the controller. -3. Update the Deployment image. -4. Watch the Rollout status and the Deployment annotations. -5. Confirm the rollout eventually reaches `MinReadyFinalized`. - -## Verify the rollout - -After the rollout starts: - -```bash -kubectl get rollout demo-rollout -kubectl get deploy demo-deploy -o yaml -kubectl describe rollout demo-rollout -``` - -Expected behavior: - -- the Deployment gets annotated with the original rollout fields -- `minReadySeconds` is inflated to the MaxReadySeconds value -- `progressDeadlineSeconds` is inflated to the MaxProgressSeconds value -- `maxUnavailable` is driven batch-by-batch -- `maxSurge` is kept at `0` - -## What to look for - -- `MinReadyInitialized` means the Deployment was initialized successfully -- `MinReadyBatching` means batches are progressing -- `MinReadyFinalized` means the original Deployment fields were restored -- `MinReadyDegraded` means the controller hit a blocking condition - -## Recreate comparison - -Compared with the old Recreate-style behavior: - -- MinReadySeconds does not change `Deployment.spec.strategy.type` -- original rollout fields are stored and restored explicitly -- batch progression is driven by readiness on inflated fields -- Recreate-style mutation is skipped when the feature is enabled - -## FAQ - -### How do I enable the feature gate? - -Set `MinReadySecondsStrategy=true` on the controller. - -### Why does the rollout fail immediately? - -The most common reasons are a disabled feature gate or a matching PDB. - -### Can I use this with a Service Mesh? - -Yes, but only if the mesh does not rely on mutating the Deployment strategy type or blocking readiness in a way that conflicts with the inflated rollout fields. - -### Why does Available stay false for a long time? - -That is expected. The strategy intentionally inflates `minReadySeconds` so the controller can control rollout progression by batch. - -## Notes - -- The strategy does not modify `Deployment.spec.strategy.type`. -- PDB-covered workloads are blocked in alpha. -- Existing annotations are treated as live state. Missing or partial original annotations are not considered a success path. diff --git a/docs/migration/recreate_to_minready.md b/docs/migration/recreate_to_minready.md deleted file mode 100644 index 968ac4b0..00000000 --- a/docs/migration/recreate_to_minready.md +++ /dev/null @@ -1,83 +0,0 @@ -# Migration from Recreate to MinReadySeconds - -This guide explains how to move an existing rollout from the legacy Recreate-based flow to the MinReadySeconds strategy. - -## What changes - -With MinReadySeconds: - -- the Deployment stays on native RollingUpdate -- the controller uses inflated rollout fields instead of switching `strategy.type` -- original rollout fields are stored in annotations and restored later - -This is different from the older Recreate behavior. - -## Behavior comparison - -| Topic | Recreate | MinReadySeconds | -|-------|----------|-----------------| -| `Deployment.spec.strategy.type` | Switched to Recreate during rollout | Left unchanged | -| Rollout control | Full stop-and-replace behavior | Batch-based progression | -| Original fields | Not restored from annotations | Stored and restored | -| PDB compatibility | Depends on workload flow | Blocked in alpha | -| Operational risk | Simpler but more invasive | Less invasive but stricter on readiness | - -## Compatibility checklist - -Before migrating, confirm: - -- the controller feature gate `MinReadySecondsStrategy` is enabled -- the target workload is not covered by a matching PDB -- the rollout spec uses `deploymentStrategy: MinReadySeconds` -- the workload can tolerate long Ready-but-not-Available periods during rollout -- your GitOps tool will not continuously fight the inflated rollout fields -- HPA is either disabled for the rollout or understood well enough to accept batch recalculation - -## Migration steps - -1. Pick a single namespace for the first trial. -2. Update the Rollout spec to use `deploymentStrategy: MinReadySeconds`. -3. Enable the `MinReadySecondsStrategy` feature gate on the controller. -4. Reconcile the rollout once so the controller writes the original annotations. -5. Verify that the live Deployment fields are inflated. -6. Watch the rollout status until `MinReadyFinalized`. -7. Roll the change out to other namespaces only after the first one is stable. - -## Expected controller behavior - -The executor routes MinReadySeconds rollouts to the MinReady controller. -The webhook keeps the Deployment strategy type unchanged for this path. -The controller updates batch state by patching `maxUnavailable` only. - -## Rollout plan for an existing service - -If you are moving a production service: - -- start with one non-critical namespace -- watch events and status conditions during the first rollout -- confirm the Deployment annotations are written and later removed -- verify that your GitOps reconciler is not reverting `maxUnavailable` -- verify that HPA is not introducing surprise replica swings during the test rollout - -## Rollback - -To roll back, switch the Rollout spec back to the Recreate strategy and let the controller reconcile the workload back to its original fields. - -If the rollout is already degraded, resolve the blocking cause first: - -- enable the feature gate if it was disabled -- remove the overlapping PDB -- repair missing or malformed original annotations -- restore live Deployment fields that drifted out of the inflated state - -## Known limitations - -- PDB-covered workloads are blocked in alpha. -- Any direct manual edit of the inflated Deployment fields can move the rollout into `MinReadyDegraded`. -- This migration is only appropriate when you want the controller to preserve the native Deployment strategy type. - -## Notes - -- Do not migrate workloads with a covering PDB unless the strategy is redesigned for that topology. -- Do not change `Deployment.spec.strategy.type` manually during the migration. -- Keep the original annotations intact until finalize completes. diff --git a/docs/operator/deployment_min_ready_runbook.md b/docs/operator/deployment_min_ready_runbook.md deleted file mode 100644 index 2c23731d..00000000 --- a/docs/operator/deployment_min_ready_runbook.md +++ /dev/null @@ -1,134 +0,0 @@ -# Deployment MinReadySeconds Runbook - -This page is for operators who need to inspect, support, or recover a MinReadySeconds rollout. - -## Quick status map - -| Condition | Meaning | Operator action | -|-----------|---------|-----------------| -| `MinReadyInitialized` | Original values are stored and live fields are inflated | Start watching batches | -| `MinReadyBatching` | A batch is in progress or waiting on readiness | Inspect pods and rollout status | -| `MinReadyDegraded` | The controller stopped on an explicit blocking issue | Follow the relevant recovery path | -| `MinReadyFinalized` | The original Deployment fields were restored | No action needed | - -## Normal lifecycle - -1. `Initialize` stores the original Deployment fields in annotations. -2. The controller inflates rollout fields and advances each batch. -3. `Finalize` restores the original fields and removes the annotations. - -The rollout status uses these conditions: - -- `MinReadyInitialized` -- `MinReadyBatching` -- `MinReadyDegraded` -- `MinReadyFinalized` - -## Common events - -- `MinReadyInitialized` -- `MinReadyBatchUpgraded` -- `MinReadyFinalized` -- `MinReadyDegradedMissingAnnotations` -- `MinReadyDegradedDriftDetected` -- `MinReadyDegradedPDBIncompatible` -- `MinReadyFeatureGateDisabled` - -The current implementation does not emit a dedicated `MinReadyBatchStuck` event. Use the `MinReadyBatching` condition together with `rollout_minready_stuck_seconds` to detect long waits. - -## Degraded states - -`MinReadyDegraded` means the controller stopped because the rollout cannot safely continue. - -Typical causes: - -- feature gate disabled -- original annotations missing or partial -- live Deployment fields no longer match the inflated MinReadySeconds state -- PDB selector matches the Deployment pods - -## Troubleshooting matrix - -| Reason | Diagnostic command | What to look for | -|--------|--------------------|------------------| -| Feature gate disabled | `kubectl describe rollout ` | Warning event with `MinReadyFeatureGateDisabled` | -| Missing annotations | `kubectl get deploy -o yaml` | One or more original annotations absent | -| Drift detected | `kubectl get deploy -o yaml` | `minReadySeconds`, `progressDeadlineSeconds`, or `maxSurge` no longer match inflated values | -| PDB conflict | `kubectl get pdb -n -o yaml` | Selector matches the workload labels | -| Batch waiting too long | `kubectl get rollout -o yaml` and metrics | `MinReadyBatching` stays true and `rollout_minready_stuck_seconds` remains above zero | - -## PDB incompatibility - -If a matching PodDisruptionBudget exists for the target Deployment, initialization is rejected. - -This is intentional in alpha. Do not try to work around it by manually forcing batch progression. - -## Break-glass flow - -Use this when the rollout is degraded and you need to recover quickly: - -1. Identify the blocking reason from events and conditions. -2. Fix the root cause instead of patching around it. -3. Reconcile the Rollout again. -4. Confirm the rollout returns to `MinReadyInitialized`, `MinReadyBatching`, or `MinReadyFinalized`. - -If the original annotations were removed accidentally, restore them before the next reconcile. Do not write default values by hand unless the original values were truly default. - -## Drift detection - -The controller treats these as drift: - -- `spec.minReadySeconds` no longer equals the inflated value -- `spec.progressDeadlineSeconds` no longer equals the inflated value -- `spec.strategy.rollingUpdate.maxSurge` is no longer `0` -- original annotations are partially missing - -When drift is detected, the rollout enters `MinReadyDegraded` and emits `MinReadyDegradedDriftDetected`. - -## Inspecting a live rollout - -```bash -kubectl get rollout -o yaml -kubectl describe rollout -kubectl get deploy -o yaml -kubectl get pdb -n -``` - -Check these fields: - -- annotations under `metadata.annotations` -- `status.conditions` -- the current batch and replica counts - -## Recovery - -Recovery depends on the cause. - -- If the feature gate was disabled, enable `MinReadySecondsStrategy` and retry the rollout. -- If a PDB matches, remove the PDB or move the workload to a non-overlapping selector. -- If annotations are missing or the live fields drifted, treat the Deployment as damaged and re-create the rollout state from the desired spec. -- If the rollout is waiting on batch readiness, inspect `status.conditions`, the Deployment replica counts, and the current batch in the Rollout status before taking action. - -## Finalization - -Finalize restores: - -- `minReadySeconds` -- `progressDeadlineSeconds` -- `maxUnavailable` -- `maxSurge` - -If finalize fails, the controller reports `MinReadyDegraded` and keeps the annotations until the blocking issue is resolved. - -## Common log patterns - -- `MinReadyControl.Initialize` failures usually point to feature gate, PDB, or annotation problems. -- `MinReadyControl.UpgradeBatch[...]` failures usually point to drift or stale workload state. -- `MinReadyControl.Finalize` failures usually point to missing annotations or malformed annotation values. - -## Monitoring suggestions - -- Alert when `MinReadyDegraded` stays true for more than one reconcile window. -- Alert when `rollout_minready_degraded_total` increases for the same rollout. -- Track `rollout_minready_batch_duration_seconds` for batch completion latency. -- Track `rollout_minready_stuck_seconds` to spot batches that are still waiting on readiness. diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go index a0e650a0..acde95d8 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go @@ -21,13 +21,16 @@ import ( "fmt" apps "k8s.io/api/apps/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/openkruise/rollouts/api/v1alpha1" "github.com/openkruise/rollouts/api/v1beta1" batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle" "github.com/openkruise/rollouts/pkg/feature" + "github.com/openkruise/rollouts/pkg/util" utilfeature "github.com/openkruise/rollouts/pkg/util/feature" ) @@ -50,7 +53,10 @@ func (mc *MinReadyControl) BuildController() (partitionstyle.Interface, error) { return &MinReadyControl{realController: rc}, nil } -func (mc *MinReadyControl) Initialize(_ *v1beta1.BatchRelease) error { +func (mc *MinReadyControl) Initialize(release *v1beta1.BatchRelease) error { + if release == nil { + return fmt.Errorf("MinReadyControl.Initialize: release is nil") + } if err := mc.ensureInitializeAllowed(); err != nil { return fmt.Errorf("MinReadyControl.Initialize: %w", err) } @@ -64,6 +70,8 @@ func (mc *MinReadyControl) Initialize(_ *v1beta1.BatchRelease) error { return fmt.Errorf("MinReadyControl.Initialize: %w", err) } } + modified.Annotations[util.BatchReleaseControlAnnotation] = util.DumpJSON(metav1.NewControllerRef( + release, release.GetObjectKind().GroupVersionKind())) inflateDeploymentStrategy(modified) patch := client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{}) return mc.client.Patch(context.TODO(), modified, patch) @@ -114,6 +122,8 @@ func (mc *MinReadyControl) Finalize(_ *v1beta1.BatchRelease) error { for _, key := range AllOriginalAnnotations { delete(modified.Annotations, key) } + delete(modified.Annotations, util.BatchReleaseControlAnnotation) + delete(modified.Labels, v1alpha1.DeploymentStableRevisionLabel) patch := client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{}) return mc.client.Patch(context.TODO(), modified, patch) } diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go index c0661c4d..717f0a38 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go @@ -51,6 +51,7 @@ func TestMinReadyInitializeWritesOriginalAnnotationsAndInflatesFields(t *testing assertAnnotation(t, annotations, AnnotationOriginalProgressDeadlineSeconds, "60") assertAnnotation(t, annotations, AnnotationOriginalMaxUnavailable, "25%") assertAnnotation(t, annotations, AnnotationOriginalMaxSurge, "1") + assertAnnotation(t, annotations, util.BatchReleaseControlAnnotation, getControlInfo(releaseDemo)) } func TestMinReadyInitializeIsIdempotentAndDoesNotOverwriteAnnotations(t *testing.T) { diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_finalize_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_finalize_test.go index cab86a88..c846fbab 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_finalize_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_finalize_test.go @@ -19,6 +19,9 @@ package deployment import ( "strings" "testing" + + appsv1alpha1 "github.com/openkruise/rollouts/api/v1alpha1" + "github.com/openkruise/rollouts/pkg/util" ) func TestMinReadyFinalizeRestoresOriginalValues(t *testing.T) { @@ -28,6 +31,10 @@ func TestMinReadyFinalizeRestoresOriginalValues(t *testing.T) { AnnotationOriginalProgressDeadlineSeconds: "60", AnnotationOriginalMaxUnavailable: "25%", AnnotationOriginalMaxSurge: "1", + util.BatchReleaseControlAnnotation: getControlInfo(releaseDemo), + } + deployment.Labels = map[string]string{ + appsv1alpha1.DeploymentStableRevisionLabel: "stable-revision", } control := newBuiltMinReadyControl(t, deployment) @@ -53,6 +60,12 @@ func TestMinReadyFinalizeRestoresOriginalValues(t *testing.T) { t.Fatalf("annotation %s still exists", key) } } + if _, ok := got.Annotations[util.BatchReleaseControlAnnotation]; ok { + t.Fatalf("annotation %s still exists", util.BatchReleaseControlAnnotation) + } + if _, ok := got.Labels[appsv1alpha1.DeploymentStableRevisionLabel]; ok { + t.Fatalf("label %s still exists", appsv1alpha1.DeploymentStableRevisionLabel) + } } func TestMinReadyFinalizeRestoresKubernetesDefaults(t *testing.T) { From 5c36a0b099ec1d82854a6eaf82f96d9d88a474c6 Mon Sep 17 00:00:00 2001 From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Date: Thu, 11 Jun 2026 19:51:42 +0800 Subject: [PATCH 05/22] fix: address MinReady review findings and repo audit issues MinReady (P0/P1/P2 from code review): - P0-2: webhook now enforces RollingUpdate/paused=false/non-nil rollingUpdate invariants for active MinReady rollouts; controller treats paused drift as drift so ensureInflatedDeploymentStrategy self-heals the freeze. - P1-1: introduce sentinel errors and classify degraded reasons via errors.Is instead of matching human-readable message text. - P1-2: UpgradeBatch converges maxUnavailable back to the batch target on scale-down instead of falsely reporting GitOps drift. - P1-4: lift MinReady annotation constants to api/v1beta1; executor keeps routing to MinReady controller (and status keeps reporting) when the feature gate is disabled mid-rollout but the Deployment still carries annotations. - P1-6: EnrollMinReadyDeployment inflates strategy synchronously at admission, closing the window where the native controller could observe the original budget before Initialize lands. - P0-1: add batchLabelSatisfied regression matrix (shared hot path). - P2-3: document the BlueGreen+Canary mutual-exclusion invariant source. Repo audit: - Restrict webhook cert dir to 0700 and private keys to 0600 (certs 0644). - parse_utils: use NestedString to avoid panic on malformed status; surface json marshal/unmarshal errors instead of silently swallowing them. - Align Dockerfile_multiarch Go builder to golang:1.20.14-alpine3.19. - Update proposal: webhook/executor behavior and add operator runbook (feature-gate lifecycle, disable preconditions, controller-death recovery). Co-Authored-By: Claude Opus 4.8 Signed-off-by: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> --- Dockerfile_multiarch | 4 +- api/v1beta1/deployment_types.go | 31 ++++ ...t-minready-seconds-progressive-delivery.md | 155 +++++++++++++----- .../batchrelease/batchrelease_executor.go | 23 ++- .../batchrelease/context/context_test.go | 60 +++++++ .../deployment/minready_constants.go | 36 ++-- .../deployment/minready_control.go | 69 ++++++-- .../deployment/minready_control_test.go | 121 ++++++++++++++ .../deployment/minready_test_helpers_test.go | 12 ++ .../control/partitionstyle/minready_errors.go | 37 +++++ .../control/partitionstyle/minready_status.go | 75 +++++---- .../partitionstyle/minready_status_test.go | 48 +++++- pkg/util/parse_utils.go | 56 +++++-- pkg/util/parse_utils_test.go | 51 ++++++ pkg/webhook/util/writer/fs.go | 20 ++- pkg/webhook/util/writer/fs_test.go | 77 +++++++++ .../mutating/workload_update_handler.go | 40 ++++- .../mutating/workload_update_handler_test.go | 115 ++++++++++++- 18 files changed, 889 insertions(+), 141 deletions(-) create mode 100644 pkg/controller/batchrelease/control/partitionstyle/minready_errors.go create mode 100644 pkg/webhook/util/writer/fs_test.go diff --git a/Dockerfile_multiarch b/Dockerfile_multiarch index f60efafb..dc58638b 100644 --- a/Dockerfile_multiarch +++ b/Dockerfile_multiarch @@ -1,7 +1,7 @@ # Build the manager binary ARG BASE_IMAGE=alpine -ARG BASE_IMAGE_VERION=3.17 -FROM --platform=$BUILDPLATFORM golang:1.19-alpine3.17 AS builder +ARG BASE_IMAGE_VERION=3.19 +FROM --platform=$BUILDPLATFORM golang:1.20.14-alpine3.19 AS builder WORKDIR /workspace diff --git a/api/v1beta1/deployment_types.go b/api/v1beta1/deployment_types.go index 5002fd82..49884f07 100644 --- a/api/v1beta1/deployment_types.go +++ b/api/v1beta1/deployment_types.go @@ -47,8 +47,39 @@ const ( // MaxInt32: 2147483647, ≈ 68 years MaxProgressSeconds = 1<<31 - 1 MaxReadySeconds = MaxProgressSeconds - 1 + + // MinReadyOriginal*Annotation snapshot the user-specified Deployment strategy + // fields before the MinReadySeconds strategy inflates them; they are used to + // restore the Deployment on finalize. A Deployment carrying any of them is + // (still) managed by the MinReady controller, even if the feature gate has + // been turned off mid-rollout. + MinReadyOriginalMinReadySecondsAnnotation = "rollouts.kruise.io/original-min-ready-seconds" + MinReadyOriginalProgressDeadlineSecondsAnnotation = "rollouts.kruise.io/original-progress-deadline-seconds" + MinReadyOriginalMaxUnavailableAnnotation = "rollouts.kruise.io/original-max-unavailable" + MinReadyOriginalMaxSurgeAnnotation = "rollouts.kruise.io/original-max-surge" ) +// MinReadyOriginalAnnotations lists all annotations that snapshot the original +// Deployment strategy fields for the MinReadySeconds strategy. +var MinReadyOriginalAnnotations = []string{ + MinReadyOriginalMinReadySecondsAnnotation, + MinReadyOriginalProgressDeadlineSecondsAnnotation, + MinReadyOriginalMaxUnavailableAnnotation, + MinReadyOriginalMaxSurgeAnnotation, +} + +// HasMinReadyOriginalAnnotations returns true if the annotations carry any +// MinReady original-strategy snapshot, i.e. the workload was initialized by +// the MinReady controller and has not been finalized yet. +func HasMinReadyOriginalAnnotations(annotations map[string]string) bool { + for _, key := range MinReadyOriginalAnnotations { + if _, ok := annotations[key]; ok { + return true + } + } + return false +} + // DeploymentStrategy is strategy field for Advanced Deployment type DeploymentStrategy struct { // RollingStyle define the behavior of rolling for deployment. diff --git a/docs/proposals/20260315-deployment-minready-seconds-progressive-delivery.md b/docs/proposals/20260315-deployment-minready-seconds-progressive-delivery.md index 1e37afa2..aa126df5 100644 --- a/docs/proposals/20260315-deployment-minready-seconds-progressive-delivery.md +++ b/docs/proposals/20260315-deployment-minready-seconds-progressive-delivery.md @@ -7,7 +7,7 @@ reviewers: - "@AiRanthem" - "@zmberg" creation-date: 2026-05-23 -last-updated: 2026-06-02 +last-updated: 2026-06-07 status: implementable --- @@ -31,7 +31,7 @@ status: implementable - [API Compatibility](#api-compatibility) - [Annotation Schema](#annotation-schema) - [Field Inflation Values](#field-inflation-values) - - [Optional maxSurge Module](#optional-maxsurge-module) + - [maxSurge Preservation](#maxsurge-preservation) - [Controller Implementation](#controller-implementation) - [Initialization Process](#initialization-process) - [Batch Upgrade Process](#batch-upgrade-process) @@ -133,7 +133,7 @@ graph TB WH["Workload Update Webhook
feature-gated Recreate skip
preserves RollingUpdate"] EX["BatchRelease Executor
feature-gated controller selection"] MRC["MinReadyControl
(embeds *realController)
Initialize / UpgradeBatch /
CalculateBatchContext / Finalize"] - MS["Optional maxSurge Module
preserve, limit, or disable
behind internal switch"] + MS["maxSurge Handling
preserve original value
validation fallback only"] end subgraph "Kubernetes Native Control Plane" @@ -154,7 +154,7 @@ graph TB EX -->|"6. Route to MinReadyControl"| MRC MRC -->|"7. Initialize:
save original fields
inflate minReadySeconds
set maxUnavailable=0"| API - MRC -.->|"optional surge behavior"| MS + MRC -.->|"preserve maxSurge
or validation fallback"| MS API -->|"persists fields + annotations"| DEP MRC -->|"8. UpgradeBatch:
increase maxUnavailable
by batch size"| API @@ -252,31 +252,20 @@ During `Initialize`, the core MinReadySeconds path inflates three Deployment fie **Why `minReadySeconds` is one less than `progressDeadlineSeconds`**: Kubernetes Deployment validation requires `minReadySeconds < progressDeadlineSeconds`. Setting both to `MaxInt32` would cause the Deployment to fail validation. The existing constant `MaxReadySeconds = MaxProgressSeconds - 1` (defined in `api/v1beta1/deployment_types.go`) is reused. -`maxSurge` is deliberately not part of the core field-inflation contract. It is handled by a separate policy module so maintainers can enable full surge support, use a conservative alpha policy, or temporarily disable the module without changing the MinReadySeconds rollout algorithm. +`maxSurge` is not a new user-facing policy or a MinReadySeconds rollout knob. It is an existing Kubernetes RollingUpdate field, and the MinReadySeconds path preserves the user's original value by default. -#### Optional maxSurge Module +#### maxSurge Preservation -Native Deployment RollingUpdate supports surge capacity, and this proposal should not require `maxSurge=1` as a semantic constraint. The implementation isolates surge handling behind an internal policy boundary: +Native Deployment RollingUpdate supports surge capacity, and this proposal should not require `maxSurge=1` as a semantic constraint. The implementation follows one internal rule: -```go -type surgePolicy interface { - Initialize(deployment *appsv1.Deployment, original intstr.IntOrString) error - Ensure(deployment *appsv1.Deployment, original intstr.IntOrString) error - Restore(deployment *appsv1.Deployment, original intstr.IntOrString) error -} -``` - -Supported policy choices: - -| Policy | Alpha status | Behavior | -|---|---|---| -| `PreserveSurgePolicy` | Preferred if accepted | Preserve the user's original `maxSurge`. Surge-created updated pods are allowed, but they are counted as batch-complete only after satisfying the original `minReadySeconds`. | -| `ConservativeSurgePolicy` | Fallback | Save and restore the user's original `maxSurge`, but use a small live value during rollout to reduce temporary capacity pressure. This is an implementation fallback, not a user-visible API guarantee. | -| `DisabledSurgePolicy` | Escape hatch | Reject or degrade workloads whose `maxSurge` requires unsupported behavior. This keeps the maxSurge module removable from alpha without touching the core MinReadySeconds controller. | +1. Store the original `maxSurge` value in `rollouts.kruise.io/original-max-surge`. +2. Preserve the live `maxSurge` value during rollout. +3. If preserving it would make the live RollingUpdate strategy invalid because `maxUnavailable=0` and effective `maxSurge=0`, temporarily use `maxSurge=1` as a validation fallback. +4. Restore the original `maxSurge` value during `Finalize`. -The batch-ready calculation is the same under all policies: count updated pods only after they are `Ready` and have remained ready for the user's original `minReadySeconds`. Therefore, preserving a larger `maxSurge` can increase temporary pod count, but it cannot mark a batch successful early. +The batch-ready calculation is independent from `maxSurge`: count updated pods only after they are `Ready` and have remained ready for the user's original `minReadySeconds`. Therefore, preserving a larger `maxSurge` can increase temporary pod count, but it cannot mark a batch successful early. -Any policy must also preserve Kubernetes RollingUpdate validation rules. In particular, the live strategy must not set both `maxUnavailable=0` and `maxSurge=0`. If the maxSurge module is disabled for alpha, the controller should reject unsupported surge configurations or keep a minimal valid live surge value rather than writing an invalid Deployment strategy. +This fallback is not a user-visible policy choice. It exists only to satisfy Kubernetes RollingUpdate validation rules while keeping the default behavior as "preserve the user's original `maxSurge`". #### Controller Implementation @@ -288,7 +277,7 @@ type MinReadyControl struct { } func NewMinReadyController(cli client.Client, key types.NamespacedName, gvk schema.GroupVersionKind) partitionstyle.Interface { - return &MinReadyControl{realController: NewController(cli, key, gvk).(*realController)} + return &MinReadyControl{realController: newRealController(cli, key)} } ``` @@ -296,7 +285,7 @@ func NewMinReadyController(cli client.Client, key types.NamespacedName, gvk sche |---|---| | `GetWorkloadInfo` | Inherited (no change). | | `ListOwnedPods` | Inherited (no change). | -| `BuildController` | Inherited (no change). | +| `BuildController` | **Wrapped** — builds the embedded real controller and returns a `MinReadyControl`. | | `Initialize` | **Overridden** — see [Initialization Process](#initialization-process). | | `UpgradeBatch` | **Overridden** — see [Batch Upgrade Process](#batch-upgrade-process). | | `CalculateBatchContext` | **Overridden** — see [Batch Context Calculation](#batch-context-calculation). | @@ -309,13 +298,13 @@ func NewMinReadyController(cli client.Client, key types.NamespacedName, gvk sche 1. **Eligibility check** (`ensureInitializeAllowed`): - The `MinReadySecondsStrategy` feature gate must be enabled. Otherwise return error → `MinReadyDegraded`. - The Deployment must use `RollingUpdate`. `Recreate` workloads continue to use the existing path. - - PDB presence is not a hard rejection. PDBs are detected for observability only because they protect Eviction API flows, not Deployment rolling updates. + - PDB presence is not an eligibility failure. PDBs protect Eviction API flows, not Deployment rolling updates, so they are not used as the batch-safety mechanism. 2. **Annotation persistence** (`writeOriginalAnnotations`): - If any of the four annotations is already present, validate that all four exist (idempotency check) and that the on-disk fields are already inflated. If consistent, no-op. - Otherwise, serialize the current values of `minReadySeconds`, `progressDeadlineSeconds`, `maxUnavailable`, `maxSurge` per the serialization rules above and write all four annotations. -3. **Field inflation** (`inflateDeploymentStrategy`): Set `minReadySeconds`, `progressDeadlineSeconds`, and `maxUnavailable` to their MinReadySeconds values. Apply the configured `maxSurge` policy module if enabled. +3. **Field inflation** (`inflateDeploymentStrategy`): Set `minReadySeconds`, `progressDeadlineSeconds`, and `maxUnavailable` to their MinReadySeconds values. Preserve `maxSurge` unless a `maxUnavailable=0 && maxSurge=0` combination would violate Kubernetes RollingUpdate validation; in that case, temporarily use `maxSurge=1`. 4. **Atomic commit**: Issue a single `Patch` using `client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{})`. The annotations and field changes are committed together; the Kubernetes API server's resource-level PATCH atomicity guarantees no partial state is observable. @@ -323,7 +312,7 @@ func NewMinReadyController(cli client.Client, key types.NamespacedName, gvk sche `UpgradeBatch(ctx)` is invoked per batch by the BatchRelease executor. It performs: -1. **Inflation invariant** (`ensureInflatedDeploymentStrategy`): Verify and, if necessary, patch the Deployment so `minReadySeconds == MaxReadySeconds` and `progressDeadlineSeconds == MaxProgressSeconds` before each batch operation. The active `maxSurge` policy performs its own `Ensure` step. This makes the inflated fields a rollout-long invariant rather than a one-time initialization side effect. +1. **Inflation invariant** (`ensureInflatedDeploymentStrategy`): Verify and, if necessary, patch the Deployment so `minReadySeconds == MaxReadySeconds` and `progressDeadlineSeconds == MaxProgressSeconds` before each batch operation. `maxSurge` remains preserved except for the validation fallback described above. This makes the inflated fields a rollout-long invariant rather than a one-time initialization side effect. 2. **Target computation**: Read the current `maxUnavailable` and compare against `ctx.DesiredUpdatedReplicas`. - If `current > target`: external write has increased `maxUnavailable` beyond the batch target → `MinReadyDegraded`. @@ -422,33 +411,66 @@ func shouldSkipRecreateMutationForMinReady(rollout *appsv1beta1.Rollout) bool { } ``` -The guard is invoked at the top of the existing Recreate mutation logic: +The guard splits the mutation into two paths. `shouldSkipRecreateMutationForMinReady` only checks `Canary` because a Rollout cannot declare both `BlueGreen` and `Canary` — the validating webhook rejects that combination — so with `BlueGreen==nil` guaranteed, the guard is equivalent to the executor's `GetRollingStyle()==Partition` routing and both sides agree on MinReady. + +**Enrollment path (workload entering progressing).** Instead of pausing the Deployment, the webhook synchronously snapshots the original strategy fields into annotations and inflates `minReadySeconds` / `progressDeadlineSeconds` / `maxUnavailable` in place via `EnrollMinReadyDeployment`: ```go if shouldSkipRecreateMutationForMinReady(rollout) { - return false, nil // do not mutate; preserve the user's original strategy + // MinReady keeps the native controller running, so it must NOT be paused. + // Inflate synchronously at admission time so the native controller never + // observes the user's original budget in the window between admission and + // MinReadyControl.Initialize. Initialize stays the fallback and validates + // (instead of rewriting) annotations that already exist. + if err := partitiondeployment.EnrollMinReadyDeployment(newObj); err != nil { + klog.Warningf("Skip MinReady enrollment for Deployment(%s/%s): %v", ...) + } +} else { + newObj.Spec.Paused = true // Partition/Recreate style disables the native controller } -// ... existing Recreate mutation logic unchanged ... ``` -When the feature gate is disabled, the existing behavior is preserved exactly. +Enrolling at admission time closes the race window that would otherwise exist between "new revision admitted" and "`Initialize` patch lands", during which the native controller could replace pods using the user's original budget before batch 0 takes effect. Enrollment does not block admission: an unsupported strategy (e.g. `Recreate`) only logs a warning, and `MinReadyControl.Initialize` surfaces a degraded condition instead. + +**Progressing path (re-admission of an active rollout).** For a Deployment already in progressing state, `enforceMinReadyInflation` re-asserts the full set of invariants the strategy depends on, rewriting unsafe external edits back to safe values before they reach storage: + +- `spec.strategy.type` forced back to `RollingUpdate` (a `Recreate` write is rejected); +- `spec.paused` forced back to `false` (a paused Deployment would silently freeze the native controller); +- `spec.strategy.rollingUpdate` ensured non-nil; +- `minReadySeconds` / `progressDeadlineSeconds` re-inflated if lowered or cleared. -The webhook also enforces the inflation invariant for active MinReadySeconds rollouts. If an external writer lowers or clears `minReadySeconds` or `progressDeadlineSeconds` while the Deployment still carries the original-value annotations, the webhook rewrites the update back to `MaxReadySeconds` and `MaxProgressSeconds`. This complements the reconcile-time `ensureInflatedDeploymentStrategy` check and prevents a short window where the native Deployment controller could observe restored values before the rollout has finalized. +This complements the reconcile-time `ensureInflatedDeploymentStrategy` check: the webhook blocks dangerous spec at admission, while the controller self-heals any drift that slips through (e.g. a direct etcd write or a GitOps reconcile between admissions). #### Strategy Selection -The BatchRelease executor routes to the MinReadySeconds controller based on the feature gate and the existing rollout shape: +The BatchRelease executor routes to the MinReadySeconds controller when the +feature gate is enabled, **or** when the target Deployment still carries the +MinReady original-value annotations: ```go -if utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) && - isNativeDeployment(release) && - isPartitionStyleCanary(release) { +if utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) || + r.deploymentHasMinReadyAnnotations(targetKey) { return partitionstyle.NewControlPlane(partitiondeployment.NewMinReadyController, ...) } return partitionstyle.NewControlPlane(partitiondeployment.NewController, ...) ``` -No strategy value is copied through `ReleasePlan`; disabled feature gates and unsupported workload shapes fall through to the existing `NewController`, preserving the current Recreate-mode behavior. +The annotation clause is the important one for **gate lifecycle safety**. A +Deployment that was already enrolled (RollingUpdate strategy, `paused=false`, +inflated fields, four original annotations) is **not** recognized as under +control by the legacy Recreate-mode controller, whose ownership check requires +`strategy.type=Recreate && paused=true`. If the gate were turned off mid-rollout +and routing fell back to the legacy controller, the workload would be stranded: +`UpgradeBatch` skipped, `Finalize` a no-op, inflated fields and annotations left +behind. Keeping MinReady control whenever the annotations are present lets an +in-flight rollout finalize cleanly and restore the user's original strategy even +after the gate is disabled. The `isMinReadyRelease` status helper is widened the +same way, so degraded conditions are not silently suppressed once the gate flips +off. + +No strategy value is copied through `ReleasePlan`; a disabled gate with no +MinReady annotations, and unsupported workload shapes, fall through to the +existing `NewController`, preserving the current Recreate-mode behavior. ### Risks and Mitigations @@ -460,7 +482,7 @@ Therefore, a PDB may coexist with this rollout, but it cannot enforce the MinRea **Mitigation**: - Do not reject a Deployment rollout only because a PDB selector covers its pods. -- Emit an informational event when a matching PDB exists so operators understand that eviction budget and rollout batch readiness are separate controls. +- Document PDB coexistence clearly so operators treat eviction budget and rollout batch readiness as separate controls. - Calculate batch readiness inside `MinReadyControl` using updated revision plus original `minReadySeconds`; never rely on PDB status as the rollout readiness signal. --- @@ -494,11 +516,60 @@ The worst-case failure mode of this strategy. - **This is the intended worst case** and the core safety guarantee. Unlike Recreate mode, which can result in a service outage from en-masse pod recreation, this strategy degrades gracefully: pods continue serving traffic in their current state. - A `MinReadyDegraded` condition is set, allowing observability systems to alert on the condition. The user can manually run `kubectl patch` to restore fields once the underlying issue is resolved. +### Operator Runbook (alpha) + +This section is the operational contract for the alpha feature gate. It captures the failure modes an operator must understand before enabling `MinReadySecondsStrategy` in a cluster. + +#### Feature gate lifecycle + +The `MinReadySecondsStrategy` gate is **cluster-scoped** (it lives on the kruise-rollout controller, not on individual Rollout resources). Enabling it changes the control mode for **every** native-Deployment partition-style rollout in the cluster, not a selected subset. Per-rollout opt-in is deferred to beta. + +- **Enabling**: turn the gate on before starting a rollout. Newly progressing Deployments are enrolled (strategy inflated) at admission time. +- **Disabling — preconditions**: a Deployment that is mid-rollout under MinReady control carries the four `rollouts.kruise.io/original-*` annotations and has inflated `minReadySeconds`/`progressDeadlineSeconds`. The old Recreate-mode controller does **not** recognize such a Deployment as under its control (it keys on `strategy.type=Recreate && paused=true`). To avoid stranding a workload in a half-initialized inflated state, **finish or cancel all in-flight MinReady rollouts before disabling the gate.** +- **Disabling — safety net**: if the gate is turned off mid-rollout anyway, the executor still routes a Deployment that carries the MinReady original annotations to the MinReady controller (it does not look only at the gate). This lets the rollout finalize and restore the original fields. Once finalized (annotations removed), routing falls back to the default controller. Verify cleanup with `kubectl get deploy -o jsonpath='{.metadata.annotations}'` — no `rollouts.kruise.io/original-*` keys should remain. + +#### `progressDeadlineSeconds` inflation disables the native stuck-detector + +To stop the native Deployment controller from declaring `ProgressDeadlineExceeded` while a batch intentionally waits, `progressDeadlineSeconds` is inflated to `MaxProgressSeconds` (≈68 years). This is deliberate, but it means the **native progress-deadline safety net is off** for the duration of the rollout. The rollout's own stuck-time gauge (`MinReadyStuckSeconds`) replaces it. Operators must alert on that gauge / on the `MinReadyDegraded` condition rather than expecting the native controller to surface a stuck rollout. + +#### Worst case: controller dies while a workload is frozen + +If the kruise-rollout controller becomes **permanently unavailable** (crash-loop, deleted deployment, broken leader election) while a Deployment is parked mid-batch, the workload freezes silently: + +- New pods stay `Ready-but-not-Available` because `minReadySeconds` is inflated; the batch never advances. +- The native progress-deadline net is disabled (see above), so the native controller will not report the freeze either. +- The only symptom is the `MinReadyStuckSeconds` gauge climbing — which requires the controller (or an external watchdog) to be alive to emit it. + +This is an accepted alpha limitation. Manual recovery without the controller: + +1. Identify the Deployment and read its saved originals: + ```bash + kubectl get deploy -n \ + -o jsonpath='{.metadata.annotations.rollouts\.kruise\.io/original-min-ready-seconds}{"\n"}{.metadata.annotations.rollouts\.kruise\.io/original-max-unavailable}{"\n"}' + ``` + A value of `__k8s_default__` means the field was unset originally (restore by removing it). +2. Restore the original strategy fields and clear the rollout control annotation: + ```bash + kubectl patch deploy -n --type merge -p '{ + "spec": {"minReadySeconds": , "progressDeadlineSeconds": , + "strategy": {"rollingUpdate": {"maxUnavailable": }}}, + "metadata": {"annotations": { + "rollouts.kruise.io/original-min-ready-seconds": null, + "rollouts.kruise.io/original-progress-deadline-seconds": null, + "rollouts.kruise.io/original-max-unavailable": null, + "rollouts.kruise.io/original-max-surge": null, + "rollouts.kruise.io/batch-release-control": null }}}' + ``` + The native Deployment controller then resumes a normal rolling update to completion. +3. Before re-enabling the gate, confirm no Deployment retains `rollouts.kruise.io/original-*` annotations. + +A production-grade watchdog that alerts on "controller liveness lost while a MinReady rollout is in flight" is tracked as beta Future Work. + ## Alternatives 1. **Continue mutating `spec.strategy.type` to `Recreate`** (current implementation): Rejected because the destructive nature of strategy mutation cannot be atomically reversed under failure. Multiple production incidents ([#305](https://github.com/openkruise/rollouts/issues/305)) demonstrate the risk. -2. **Custom `ReadinessGate` to gate pod availability**: A future direction (Plan B) that would allow PDB coexistence. Rejected for the alpha phase because it requires a custom mutating webhook to inject the gate, a separate controller to manage the gate condition, and significant additional testing surface. Tracked as Future Work for beta. +2. **Custom `ReadinessGate` to gate pod availability**: A future direction (Plan B) that would let PDB/disruption-controller visibility participate in rollout gating. Rejected for the alpha phase because it requires a custom mutating webhook to inject the gate, a separate controller to manage the gate condition, and significant additional testing surface. Tracked as Future Work for beta. 3. **Use `paused=true` plus partition annotations** (similar to CloneSet): Rejected because the native Deployment controller does not honor a partition mechanism. Implementing partition-style for native Deployment would require either re-implementing the rolling update loop or relying on `Recreate`, returning to the original problem. @@ -529,6 +600,6 @@ Users opt in by enabling the feature gate on the kruise-rollout controller. - [ ] Q2 2026 (GSoC weeks 1–6): MinReadyControl core implementation (Initialize / UpgradeBatch / CalculateBatchContext / Finalize) with unit tests - [ ] Q3 2026 (GSoC weeks 7–8): Webhook invariant enforcement and feature-gated strategy selection - [ ] Q3 2026 (GSoC weeks 9–10): End-to-end tests covering the five core scenarios -- [ ] Q3 2026 (GSoC weeks 11–12): PDB coexistence, maxSurge policy hardening, edge cases, documentation +- [ ] Q3 2026 (GSoC weeks 11–12): PDB coexistence, maxSurge preservation edge cases, documentation - [ ] TBD: Observability follow-up (status conditions, events, Prometheus metrics) - [ ] TBD: Plan B (custom `ReadinessGate`) if future requirements need PDB-aware workload availability semantics diff --git a/pkg/controller/batchrelease/batchrelease_executor.go b/pkg/controller/batchrelease/batchrelease_executor.go index 0f20dacd..e934c6c0 100644 --- a/pkg/controller/batchrelease/batchrelease_executor.go +++ b/pkg/controller/batchrelease/batchrelease_executor.go @@ -17,6 +17,7 @@ limitations under the License. package batchrelease import ( + "context" "fmt" "reflect" "time" @@ -246,7 +247,14 @@ func (r *Executor) getReleaseController(release *v1beta1.BatchRelease, newStatus return partitionstyle.NewControlPlane(cloneset.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil } if targetRef.APIVersion == apps.SchemeGroupVersion.String() && targetRef.Kind == reflect.TypeOf(apps.Deployment{}).Name() { - if utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) { + // Route to the MinReady controller when the feature gate is enabled, or + // when the Deployment still carries MinReady original-strategy annotations. + // The latter covers the gate being turned off mid-rollout: the old + // Recreate-mode controller would not recognize an inflated RollingUpdate + // Deployment as under its control, leaving the workload stuck in a + // half-initialized state. Keeping MinReady control lets it finalize and + // restore the original fields. + if utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) || r.deploymentHasMinReadyAnnotations(targetKey) { klog.InfoS("Using Deployment MinReadySeconds partition-style release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) return partitionstyle.NewControlPlane(partitiondeployment.NewMinReadyController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil } @@ -261,6 +269,19 @@ func (r *Executor) getReleaseController(release *v1beta1.BatchRelease, newStatus return partitionstyle.NewControlPlane(statefulset.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil } +// deploymentHasMinReadyAnnotations reports whether the target Deployment still +// carries MinReady original-strategy annotations, i.e. it was initialized by the +// MinReady controller and not yet finalized. Used to keep MinReady routing when +// the feature gate is disabled mid-rollout. A fetch failure (e.g. NotFound) +// returns false so routing falls back to the default controller. +func (r *Executor) deploymentHasMinReadyAnnotations(key types.NamespacedName) bool { + deployment := &apps.Deployment{} + if err := r.client.Get(context.TODO(), key, deployment); err != nil { + return false + } + return v1beta1.HasMinReadyOriginalAnnotations(deployment.Annotations) +} + func (r *Executor) moveToNextBatch(release *v1beta1.BatchRelease, status *v1beta1.BatchReleaseStatus) { currentBatch := int(status.CanaryStatus.CurrentBatch) if currentBatch >= len(release.Spec.ReleasePlan.Batches)-1 { diff --git a/pkg/controller/batchrelease/context/context_test.go b/pkg/controller/batchrelease/context/context_test.go index e1515c20..704618e9 100644 --- a/pkg/controller/batchrelease/context/context_test.go +++ b/pkg/controller/batchrelease/context/context_test.go @@ -190,3 +190,63 @@ func generatePodsWith(labels map[string]string, replicas int, beginOrder int) [] } return pods } + +// TestBatchLabelSatisfied is a regression matrix for the shared batchLabelSatisfied +// helper (P0-1). It is on the hot path for every partition-style control plane +// (CloneSet, StatefulSet, Advanced DaemonSet, Advanced Deployment, MinReady), so +// the empty-pod-list semantics must hold regardless of feature gate. The key +// change being locked down: rolloutID set AND targetCount > 0 AND no pods listed +// must return false (the batch label cannot be satisfied by zero pods), instead +// of the previous true. +func TestBatchLabelSatisfied(t *testing.T) { + labeledPods := generatePodsWith(map[string]string{ + v1beta1.RolloutIDLabel: "rollout-1", + }, 3, 0) + + cases := map[string]struct { + pods []*corev1.Pod + rolloutID string + targetCount int32 + want bool + }{ + "empty rolloutID short-circuits to true, no pods": { + pods: nil, rolloutID: "", targetCount: 5, want: true, + }, + "empty rolloutID short-circuits to true, with pods": { + pods: labeledPods, rolloutID: "", targetCount: 5, want: true, + }, + "targetCount zero short-circuits to true, no pods": { + pods: nil, rolloutID: "rollout-1", targetCount: 0, want: true, + }, + "targetCount negative short-circuits to true": { + pods: nil, rolloutID: "rollout-1", targetCount: -1, want: true, + }, + "rolloutID set, target>0, no pods listed -> false (P0-1 core change)": { + pods: nil, rolloutID: "rollout-1", targetCount: 3, want: false, + }, + "rolloutID set, target>0, empty (non-nil) pod slice -> false": { + pods: []*corev1.Pod{}, rolloutID: "rollout-1", targetCount: 3, want: false, + }, + "rolloutID set, enough labeled pods -> true": { + pods: labeledPods, rolloutID: "rollout-1", targetCount: 3, want: true, + }, + "rolloutID set, not enough labeled pods -> false": { + pods: labeledPods, rolloutID: "rollout-1", targetCount: 4, want: false, + }, + "rolloutID set, pods labeled with a different id -> false": { + pods: generatePodsWith(map[string]string{v1beta1.RolloutIDLabel: "rollout-2"}, 3, 0), + rolloutID: "rollout-1", + targetCount: 1, + want: false, + }, + } + + for name, cs := range cases { + t.Run(name, func(t *testing.T) { + if got := batchLabelSatisfied(cs.pods, cs.rolloutID, cs.targetCount); got != cs.want { + t.Fatalf("batchLabelSatisfied(pods=%d, id=%q, target=%d) = %v, want %v", + len(cs.pods), cs.rolloutID, cs.targetCount, got, cs.want) + } + }) + } +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_constants.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_constants.go index 41783338..5742f014 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_constants.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_constants.go @@ -24,13 +24,17 @@ import ( "k8s.io/apimachinery/pkg/util/intstr" "github.com/openkruise/rollouts/api/v1beta1" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle" ) const ( - AnnotationOriginalMinReadySeconds = "rollouts.kruise.io/original-min-ready-seconds" - AnnotationOriginalProgressDeadlineSeconds = "rollouts.kruise.io/original-progress-deadline-seconds" - AnnotationOriginalMaxUnavailable = "rollouts.kruise.io/original-max-unavailable" - AnnotationOriginalMaxSurge = "rollouts.kruise.io/original-max-surge" + // Aliases kept for readability inside this package; the canonical + // definitions live in api/v1beta1 so that packages which cannot import + // this one (e.g. partitionstyle) can still recognize MinReady state. + AnnotationOriginalMinReadySeconds = v1beta1.MinReadyOriginalMinReadySecondsAnnotation + AnnotationOriginalProgressDeadlineSeconds = v1beta1.MinReadyOriginalProgressDeadlineSecondsAnnotation + AnnotationOriginalMaxUnavailable = v1beta1.MinReadyOriginalMaxUnavailableAnnotation + AnnotationOriginalMaxSurge = v1beta1.MinReadyOriginalMaxSurgeAnnotation AnnotationValueKubernetesDefault = "__k8s_default__" @@ -39,12 +43,7 @@ const ( InflatedMaxSurgeInt int32 = 1 ) -var AllOriginalAnnotations = []string{ - AnnotationOriginalMinReadySeconds, - AnnotationOriginalProgressDeadlineSeconds, - AnnotationOriginalMaxUnavailable, - AnnotationOriginalMaxSurge, -} +var AllOriginalAnnotations = v1beta1.MinReadyOriginalAnnotations func serializeOriginalInt32(value *int32) string { if value == nil { @@ -70,7 +69,7 @@ func parseOriginalInt32(annotations map[string]string, key string) (*int32, erro } n, err := strconv.ParseInt(raw, 10, 32) if err != nil { - return nil, fmt.Errorf("annotation %s malformed int32: %w", key, err) + return nil, fmt.Errorf("annotation %s malformed int32: %v: %w", key, err, partitionstyle.ErrMinReadyAnnotationInvalid) } v := int32(n) return &v, nil @@ -83,14 +82,14 @@ func parseOriginalIntOrString(annotations map[string]string, key string) (*intst } if strings.HasSuffix(raw, "%") { if _, err := strconv.Atoi(strings.TrimSuffix(raw, "%")); err != nil { - return nil, fmt.Errorf("annotation %s malformed percent: %w", key, err) + return nil, fmt.Errorf("annotation %s malformed percent: %v: %w", key, err, partitionstyle.ErrMinReadyAnnotationInvalid) } v := intstr.FromString(raw) return &v, nil } n, err := strconv.Atoi(raw) if err != nil { - return nil, fmt.Errorf("annotation %s malformed int: %w", key, err) + return nil, fmt.Errorf("annotation %s malformed int: %v: %w", key, err, partitionstyle.ErrMinReadyAnnotationInvalid) } v := intstr.FromInt(n) return &v, nil @@ -99,19 +98,14 @@ func parseOriginalIntOrString(annotations map[string]string, key string) (*intst func readOriginalAnnotation(annotations map[string]string, key string) (string, error) { raw, ok := annotations[key] if !ok { - return "", fmt.Errorf("annotation %s missing", key) + return "", fmt.Errorf("annotation %s missing: %w", key, partitionstyle.ErrMinReadyAnnotationInvalid) } if raw == "" { - return "", fmt.Errorf("annotation %s present but empty", key) + return "", fmt.Errorf("annotation %s present but empty: %w", key, partitionstyle.ErrMinReadyAnnotationInvalid) } return raw, nil } func hasAnyOriginalAnnotation(annotations map[string]string) bool { - for _, key := range AllOriginalAnnotations { - if _, ok := annotations[key]; ok { - return true - } - } - return false + return v1beta1.HasMinReadyOriginalAnnotations(annotations) } diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go index acde95d8..b07a3ca2 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go @@ -23,6 +23,7 @@ import ( apps "k8s.io/api/apps/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/client" "github.com/openkruise/rollouts/api/v1alpha1" @@ -87,13 +88,16 @@ func (mc *MinReadyControl) UpgradeBatch(ctx *batchcontext.BatchContext) error { return fmt.Errorf("MinReadyControl.UpgradeBatch[%d]: %w", ctx.CurrentBatch, err) } target := ctx.DesiredUpdatedReplicas - if int32(current) > target { - return fmt.Errorf("MinReadyControl.UpgradeBatch[%d]: %s: maxUnavailable=%d exceeds target=%d", - ctx.CurrentBatch, EventDegradedDriftDetected, current, target) - } - if int32(current) >= target { + if int32(current) == target { return nil } + if int32(current) > target { + // maxUnavailable above the batch target is a legal state after a + // scale-down (HPA or manual) and also self-heals external tampering; + // converge it back to the target instead of reporting degraded drift. + klog.Warningf("MinReadyControl.UpgradeBatch[%d]: deployment %v maxUnavailable=%d exceeds target=%d, reducing it to the target", + ctx.CurrentBatch, klog.KObj(mc.object), current, target) + } original := mc.object.DeepCopy() modified := original.DeepCopy() maxUnavailable := intstr.FromInt(int(target)) @@ -108,7 +112,8 @@ func (mc *MinReadyControl) Finalize(_ *v1beta1.BatchRelease) error { } if !hasAnyOriginalAnnotation(mc.object.Annotations) { if hasInflatedDeploymentFields(mc.object) { - return fmt.Errorf("MinReadyControl.Finalize: annotation state missing while deployment fields are still inflated") + return fmt.Errorf("MinReadyControl.Finalize: annotation state missing while deployment fields are still inflated: %w", + partitionstyle.ErrMinReadyAnnotationInvalid) } return nil } @@ -166,7 +171,7 @@ func (mc *MinReadyControl) ensureInitializeAllowed() error { return fmt.Errorf("deployment is not loaded") } if !utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) { - return fmt.Errorf("%s feature gate is disabled", feature.MinReadySecondsStrategy) + return fmt.Errorf("%s %w", feature.MinReadySecondsStrategy, partitionstyle.ErrMinReadyFeatureGateDisabled) } if err := validateDeploymentStrategyType(mc.object); err != nil { return err @@ -215,6 +220,9 @@ func originalMaxSurge(deployment *apps.Deployment) *intstr.IntOrString { func inflateDeploymentStrategy(deployment *apps.Deployment) { progressDeadlineSeconds := InflatedProgressDeadlineSeconds maxUnavailable := intstr.FromInt(0) + // MinReady keeps the native controller running; a paused Deployment would + // freeze silently, so pausing is always reverted together with inflation. + deployment.Spec.Paused = false deployment.Spec.MinReadySeconds = InflatedMinReadySeconds deployment.Spec.ProgressDeadlineSeconds = &progressDeadlineSeconds if deployment.Spec.Strategy.RollingUpdate == nil { @@ -224,6 +232,29 @@ func inflateDeploymentStrategy(deployment *apps.Deployment) { applyMaxSurgeValidationFallback(deployment) } +// EnrollMinReadyDeployment snapshots the original strategy fields into +// annotations and inflates them in place. The workload mutating webhook calls +// it when a Deployment enters rollout progressing, so the native controller +// never observes the original maxUnavailable/minReadySeconds budget between +// admission and MinReadyControl.Initialize; Initialize stays the fallback and +// validates (instead of rewriting) annotations that already exist. +func EnrollMinReadyDeployment(deployment *apps.Deployment) error { + if err := validateDeploymentStrategyType(deployment); err != nil { + return err + } + snapshot := deployment.DeepCopy() + if err := writeOriginalAnnotations(snapshot, deployment); err != nil { + return err + } + if hasAnyOriginalAnnotation(snapshot.Annotations) { + if err := validateInflatedDeploymentStrategy(snapshot); err != nil { + return err + } + } + inflateDeploymentStrategy(deployment) + return nil +} + func (mc *MinReadyControl) ensureInflatedDeploymentStrategy() error { if err := validateDeploymentStrategyType(mc.object); err != nil { return err @@ -246,24 +277,29 @@ func validateInflatedDeploymentStrategy(deployment *apps.Deployment) error { if err := validateDeploymentStrategyType(deployment); err != nil { return err } + if deployment.Spec.Paused { + // A paused Deployment silently freezes the native controller; surface + // it through the degraded channel instead of waiting without signal. + return fmt.Errorf("%w: deployment is paused", partitionstyle.ErrMinReadyDriftDetected) + } if deployment.Spec.MinReadySeconds != InflatedMinReadySeconds { - return fmt.Errorf("%s: minReadySeconds=%d want %d", - EventDegradedDriftDetected, deployment.Spec.MinReadySeconds, InflatedMinReadySeconds) + return fmt.Errorf("%w: minReadySeconds=%d want %d", + partitionstyle.ErrMinReadyDriftDetected, deployment.Spec.MinReadySeconds, InflatedMinReadySeconds) } if deployment.Spec.ProgressDeadlineSeconds == nil || *deployment.Spec.ProgressDeadlineSeconds != InflatedProgressDeadlineSeconds { - return fmt.Errorf("%s: progressDeadlineSeconds=%v want %d", - EventDegradedDriftDetected, deployment.Spec.ProgressDeadlineSeconds, InflatedProgressDeadlineSeconds) + return fmt.Errorf("%w: progressDeadlineSeconds=%v want %d", + partitionstyle.ErrMinReadyDriftDetected, deployment.Spec.ProgressDeadlineSeconds, InflatedProgressDeadlineSeconds) } if deployment.Spec.Strategy.RollingUpdate == nil { - return fmt.Errorf("%s: rollingUpdate is nil", EventDegradedDriftDetected) + return fmt.Errorf("%w: rollingUpdate is nil", partitionstyle.ErrMinReadyDriftDetected) } return nil } func validateDeploymentStrategyType(deployment *apps.Deployment) error { if deployment.Spec.Strategy.Type != apps.RollingUpdateDeploymentStrategyType { - return fmt.Errorf("%s: deployment strategy type %s is not RollingUpdate", - EventDegradedDriftDetected, deployment.Spec.Strategy.Type) + return fmt.Errorf("%w: deployment strategy type %s is not RollingUpdate", + partitionstyle.ErrMinReadyDriftDetected, deployment.Spec.Strategy.Type) } return nil } @@ -344,6 +380,9 @@ func applyOriginalDeploymentStrategy(deployment *apps.Deployment, original *orig deployment.Spec.Strategy.RollingUpdate.MaxSurge = original.maxSurge } -const EventDegradedDriftDetected = "MinReadyDegradedDriftDetected" +// EventDegradedDriftDetected is the warning event reason recorded when +// external drift of the inflated fields is detected. It equals the sentinel +// error text so events, metrics and errors.Is classification stay in sync. +var EventDegradedDriftDetected = partitionstyle.ErrMinReadyDriftDetected.Error() var _ partitionstyle.Interface = (*MinReadyControl)(nil) diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go index 717f0a38..a7d27887 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go @@ -23,6 +23,7 @@ import ( apps "k8s.io/api/apps/v1" policyv1 "k8s.io/api/policy/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/utils/pointer" batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" @@ -218,6 +219,37 @@ func TestMinReadyUpgradeBatchRejectsStrategyTypeDrift(t *testing.T) { } } +func TestMinReadyUpgradeBatchHealsPausedDrift(t *testing.T) { + // P0-2: a Deployment paused mid-rollout silently freezes the native + // controller. validateInflatedDeploymentStrategy now treats paused as drift, + // so ensureInflatedDeploymentStrategy re-inflates and clears spec.paused, + // actively unfreezing the workload instead of leaving it stuck without signal. + // (Recreate strategy-type drift is reported as degraded instead of healed, + // because Recreate may have already deleted pods destructively.) + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newInflatedMinReadyDeployment() + addMinReadyOriginalAnnotations(deployment) + deployment.Spec.Paused = true + control := newBuiltMinReadyControl(t, deployment) + ctx := &batchcontext.BatchContext{ + CurrentBatch: 1, + Replicas: 10, + DesiredUpdatedReplicas: 5, + } + + if err := control.UpgradeBatch(ctx); err != nil { + t.Fatalf("UpgradeBatch failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + if got.Spec.Paused { + t.Fatalf("deployment still paused, want spec.paused=false after self-heal") + } + if got.Spec.MinReadySeconds != InflatedMinReadySeconds { + t.Fatalf("minReadySeconds = %d, want %d (re-inflated)", got.Spec.MinReadySeconds, InflatedMinReadySeconds) + } +} + func TestMinReadyUpgradeBatchRestoresInflatedStrategyFields(t *testing.T) { _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") deployment := newInflatedMinReadyDeployment() @@ -251,6 +283,33 @@ func TestMinReadyUpgradeBatchRestoresInflatedStrategyFields(t *testing.T) { } } +func TestMinReadyUpgradeBatchConvergesMaxUnavailableOnScaleDown(t *testing.T) { + // P1-2: after a scale-down (HPA or manual) the previously-set integer + // maxUnavailable can exceed the new batch target. This is a legal state, not + // external tampering, so UpgradeBatch must converge it back to the target + // instead of reporting degraded drift. + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newInflatedMinReadyDeployment() + addMinReadyOriginalAnnotations(deployment) + maxUnavailable := intstr.FromInt(8) + deployment.Spec.Strategy.RollingUpdate.MaxUnavailable = &maxUnavailable + control := newBuiltMinReadyControl(t, deployment) + ctx := &batchcontext.BatchContext{ + CurrentBatch: 1, + Replicas: 10, + DesiredUpdatedReplicas: 5, + } + + if err := control.UpgradeBatch(ctx); err != nil { + t.Fatalf("UpgradeBatch failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + if value := minReadyMaxUnavailableValue(t, got, 10); value != 5 { + t.Fatalf("maxUnavailable = %d, want 5 (converged to target)", value) + } +} + func TestMinReadyCalculateBatchContextUsesUpdatedReadyReplicas(t *testing.T) { release := releaseDemo.DeepCopy() release.Status.CanaryStatus.CurrentBatch = 1 @@ -427,3 +486,65 @@ func TestMinReadyCalculateBatchContextReplicasZero(t *testing.T) { t.Fatalf("DesiredUpdatedReplicas = %d, want 0", ctx.DesiredUpdatedReplicas) } } + +func TestEnrollMinReadyDeploymentSnapshotsAndInflates(t *testing.T) { + // P1-6: enrollment runs at admission time so the native controller never + // observes the user's original budget before Initialize lands. + deployment := newMinReadyDeployment() + if err := EnrollMinReadyDeployment(deployment); err != nil { + t.Fatalf("EnrollMinReadyDeployment failed: %v", err) + } + if !hasAnyOriginalAnnotation(deployment.Annotations) { + t.Fatalf("expected original annotations to be written") + } + if deployment.Annotations[AnnotationOriginalMinReadySeconds] != "7" { + t.Fatalf("original min-ready-seconds = %q, want 7", deployment.Annotations[AnnotationOriginalMinReadySeconds]) + } + assertMinReadyInflated(t, deployment) +} + +func TestEnrollMinReadyDeploymentValidatesExistingAnnotations(t *testing.T) { + // When annotations already exist (e.g. a re-admission), enrollment validates + // the inflated state instead of rewriting the snapshot. + deployment := newInflatedMinReadyDeployment() + addMinReadyOriginalAnnotations(deployment) + original := deployment.Annotations[AnnotationOriginalMinReadySeconds] + if err := EnrollMinReadyDeployment(deployment); err != nil { + t.Fatalf("EnrollMinReadyDeployment failed: %v", err) + } + if deployment.Annotations[AnnotationOriginalMinReadySeconds] != original { + t.Fatalf("original annotation was rewritten: %q -> %q", original, deployment.Annotations[AnnotationOriginalMinReadySeconds]) + } +} + +func TestEnrollMinReadyDeploymentRejectsRecreate(t *testing.T) { + deployment := newMinReadyDeployment() + deployment.Spec.Strategy.Type = apps.RecreateDeploymentStrategyType + if err := EnrollMinReadyDeployment(deployment); err == nil { + t.Fatalf("EnrollMinReadyDeployment accepted Recreate strategy, want error") + } +} + +func TestMinReadyFinalizeRestoresAfterGateDisabled(t *testing.T) { + // P1-4: even with the feature gate disabled, a Deployment carrying MinReady + // original annotations must finalize cleanly and restore the original fields. + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=false") + deployment := newInflatedMinReadyDeployment() + addMinReadyOriginalAnnotations(deployment) + control := newBuiltMinReadyControl(t, deployment) + + if err := control.Finalize(releaseDemo.DeepCopy()); err != nil { + t.Fatalf("Finalize failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + if got.Spec.MinReadySeconds != 7 { + t.Fatalf("minReadySeconds = %d, want 7 (restored)", got.Spec.MinReadySeconds) + } + if got.Spec.ProgressDeadlineSeconds == nil || *got.Spec.ProgressDeadlineSeconds != 60 { + t.Fatalf("progressDeadlineSeconds = %v, want 60 (restored)", got.Spec.ProgressDeadlineSeconds) + } + if hasAnyOriginalAnnotation(got.Annotations) { + t.Fatalf("original annotations not cleaned up: %v", got.Annotations) + } +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_test_helpers_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_test_helpers_test.go index 80ef17f1..4d20601c 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_test_helpers_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_test_helpers_test.go @@ -233,3 +233,15 @@ func pointerInt32(v int32) *int32 { func pointerBool(v bool) *bool { return &v } + +func minReadyMaxUnavailableValue(t *testing.T, deployment *apps.Deployment, replicas int32) int { + t.Helper() + if deployment.Spec.Strategy.RollingUpdate == nil || deployment.Spec.Strategy.RollingUpdate.MaxUnavailable == nil { + t.Fatalf("rollingUpdate.maxUnavailable is nil") + } + value, err := intstr.GetScaledValueFromIntOrPercent(deployment.Spec.Strategy.RollingUpdate.MaxUnavailable, int(replicas), true) + if err != nil { + t.Fatalf("scaled maxUnavailable failed: %v", err) + } + return value +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/minready_errors.go b/pkg/controller/batchrelease/control/partitionstyle/minready_errors.go new file mode 100644 index 00000000..d27d0709 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/minready_errors.go @@ -0,0 +1,37 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package partitionstyle + +import "errors" + +// Sentinel errors used to classify MinReady degraded conditions into stable +// Prometheus metric labels and event reasons. Producers must wrap them with +// %w so that classification relies on errors.Is instead of message text. +var ( + // ErrMinReadyFeatureGateDisabled indicates the MinReadySecondsStrategy + // feature gate is disabled while a MinReady operation was requested. + ErrMinReadyFeatureGateDisabled = errors.New("feature gate is disabled") + + // ErrMinReadyAnnotationInvalid covers missing, empty or malformed + // MinReady original-strategy annotations. + ErrMinReadyAnnotationInvalid = errors.New("original annotation invalid") + + // ErrMinReadyDriftDetected indicates the inflated Deployment fields were + // changed externally (GitOps reconcile, manual kubectl, etc.). Its text + // doubles as the warning event reason. + ErrMinReadyDriftDetected = errors.New("MinReadyDegradedDriftDetected") +) diff --git a/pkg/controller/batchrelease/control/partitionstyle/minready_status.go b/pkg/controller/batchrelease/control/partitionstyle/minready_status.go index e8e17d82..8261a1f9 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/minready_status.go +++ b/pkg/controller/batchrelease/control/partitionstyle/minready_status.go @@ -17,8 +17,8 @@ limitations under the License. package partitionstyle import ( + "errors" "reflect" - "strings" "time" apps "k8s.io/api/apps/v1" @@ -32,13 +32,27 @@ import ( ) func (rc *realBatchControlPlane) isMinReadyRelease() bool { - if rc.release == nil || !utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) { + if rc.release == nil { return false } targetRef := rc.release.Spec.WorkloadRef - return targetRef.APIVersion == apps.SchemeGroupVersion.String() && + isDeploymentPartition := targetRef.APIVersion == apps.SchemeGroupVersion.String() && targetRef.Kind == reflect.TypeOf(apps.Deployment{}).Name() && rc.release.Spec.ReleasePlan.RollingStyle == v1beta1.PartitionRollingStyle + if !isDeploymentPartition { + return false + } + if utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) { + return true + } + // Gate disabled mid-rollout: a Deployment still carrying MinReady original + // annotations is under MinReady control until finalized. Keep recording its + // status so degraded conditions are not silently suppressed. Falls back to + // false before the controller is built (no workload info yet). + if info := rc.GetWorkloadInfo(); info != nil { + return v1beta1.HasMinReadyOriginalAnnotations(info.Annotations) + } + return false } func (rc *realBatchControlPlane) recordMinReadyNormal(condType v1beta1.RolloutConditionType, reason, message string) { @@ -78,11 +92,12 @@ func (rc *realBatchControlPlane) recordMinReadyDegraded(reason string, err error return } message := err.Error() - eventReason := minReadyDegradedEventReason(reason, message) + classified := classifyMinReadyDegradedReason(reason, err) + eventReason := classified.event condition := util.NewRolloutCondition(v1beta1.RolloutConditionMinReadyDegraded, v1.ConditionTrue, eventReason, message) util.SetBatchReleaseCondition(rc.newStatus, *condition) rc.newStatus.Message = message - degradedReason := minReadyDegradedMetricReason(message) + degradedReason := classified.metric brmetrics.ClearMinReadyStuckSeconds(rc.release, brmetrics.StuckReasonBatchReadyTimeout) brmetrics.RecordMinReadyBatch(rc.release, brmetrics.BatchResultDegraded) brmetrics.RecordMinReadyDegraded(rc.release, degradedReason) @@ -110,36 +125,30 @@ type minReadyDegradedReason struct { event string } -func minReadyDegradedMetricReason(message string) string { - return classifyMinReadyDegradedReason("", message).metric -} - -func minReadyDegradedEventReason(fallback, message string) string { - return classifyMinReadyDegradedReason(fallback, message).event -} - -func classifyMinReadyDegradedReason(fallback, message string) minReadyDegradedReason { - eventReason := fallback - metricReason := brmetrics.DegradedReasonControllerError +// classifyMinReadyDegradedReason maps a degraded error onto a stable metric +// label and event reason via errors.Is, so the classification does not depend +// on human-readable error text. Producers wrap the sentinels in minready_errors.go +// with %w; fallback is used as the event reason for unclassified errors. +func classifyMinReadyDegradedReason(fallback string, err error) minReadyDegradedReason { switch { - case strings.Contains(message, "feature gate is disabled"): - metricReason = brmetrics.DegradedReasonFeatureGateDisabled - eventReason = "MinReadyFeatureGateDisabled" - case strings.Contains(message, "annotation ") && strings.Contains(message, "missing"): - metricReason = brmetrics.DegradedReasonMissingAnnotations - eventReason = "MinReadyDegradedMissingAnnotations" - case strings.Contains(message, "annotation ") && strings.Contains(message, "empty"): - metricReason = brmetrics.DegradedReasonMissingAnnotations - eventReason = "MinReadyDegradedMissingAnnotations" - case strings.Contains(message, "annotation ") && strings.Contains(message, "malformed"): - metricReason = brmetrics.DegradedReasonMissingAnnotations - eventReason = "MinReadyDegradedMissingAnnotations" - case strings.Contains(message, "MinReadyDegradedDriftDetected"): - metricReason = brmetrics.DegradedReasonGitOpsDrift - eventReason = "MinReadyDegradedDriftDetected" + case errors.Is(err, ErrMinReadyFeatureGateDisabled): + return minReadyDegradedReason{ + metric: brmetrics.DegradedReasonFeatureGateDisabled, + event: "MinReadyFeatureGateDisabled", + } + case errors.Is(err, ErrMinReadyAnnotationInvalid): + return minReadyDegradedReason{ + metric: brmetrics.DegradedReasonMissingAnnotations, + event: "MinReadyDegradedMissingAnnotations", + } + case errors.Is(err, ErrMinReadyDriftDetected): + return minReadyDegradedReason{ + metric: brmetrics.DegradedReasonGitOpsDrift, + event: "MinReadyDegradedDriftDetected", + } } return minReadyDegradedReason{ - metric: metricReason, - event: eventReason, + metric: brmetrics.DegradedReasonControllerError, + event: fallback, } } diff --git a/pkg/controller/batchrelease/control/partitionstyle/minready_status_test.go b/pkg/controller/batchrelease/control/partitionstyle/minready_status_test.go index 0806e72a..43195119 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/minready_status_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/minready_status_test.go @@ -17,6 +17,8 @@ limitations under the License. package partitionstyle import ( + "errors" + "fmt" "testing" "time" @@ -142,10 +144,48 @@ func TestObserveMinReadyBatchWaitSetsStuckGauge(t *testing.T) { } } -func TestMinReadyDegradedMetricReasonDetectsDrift(t *testing.T) { - message := "MinReadyControl.UpgradeBatch[1]: MinReadyDegradedDriftDetected: maxUnavailable=3 exceeds target=2" - if got := minReadyDegradedMetricReason(message); got != brmetrics.DegradedReasonGitOpsDrift { - t.Fatalf("metric reason = %q, want %q", got, brmetrics.DegradedReasonGitOpsDrift) +func TestClassifyMinReadyDegradedReason(t *testing.T) { + cases := []struct { + name string + err error + metric string + event string + }{ + { + name: "drift", + err: fmt.Errorf("MinReadyControl.UpgradeBatch[1]: %w: maxUnavailable=3 exceeds target=2", ErrMinReadyDriftDetected), + metric: brmetrics.DegradedReasonGitOpsDrift, + event: "MinReadyDegradedDriftDetected", + }, + { + name: "feature gate disabled", + err: fmt.Errorf("MinReadyControl.Initialize: %w", ErrMinReadyFeatureGateDisabled), + metric: brmetrics.DegradedReasonFeatureGateDisabled, + event: "MinReadyFeatureGateDisabled", + }, + { + name: "annotation invalid", + err: fmt.Errorf("annotation foo missing: %w", ErrMinReadyAnnotationInvalid), + metric: brmetrics.DegradedReasonMissingAnnotations, + event: "MinReadyDegradedMissingAnnotations", + }, + { + name: "unclassified falls back", + err: errors.New("some controller error"), + metric: brmetrics.DegradedReasonControllerError, + event: "MinReadyBatchingFailed", + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := classifyMinReadyDegradedReason("MinReadyBatchingFailed", tc.err) + if got.metric != tc.metric { + t.Fatalf("metric reason = %q, want %q", got.metric, tc.metric) + } + if got.event != tc.event { + t.Fatalf("event reason = %q, want %q", got.event, tc.event) + } + }) } } diff --git a/pkg/util/parse_utils.go b/pkg/util/parse_utils.go index 7de786df..b41f8dce 100644 --- a/pkg/util/parse_utils.go +++ b/pkg/util/parse_utils.go @@ -29,6 +29,7 @@ import ( "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -395,11 +396,17 @@ func parseStatusIntFromUnstructured(object *unstructured.Unstructured, field str // ParseStatusStringFromUnstructured can parse some fields with string type from unstructured workload object status func parseStatusStringFromUnstructured(object *unstructured.Unstructured, field string) string { - value, found, err := unstructured.NestedFieldNoCopy(object.Object, "status", field) - if err == nil && found { - return value.(string) + // NestedString returns an error (not a panic) when the field exists but is + // not a string, so a malformed status cannot crash the controller. + value, found, err := unstructured.NestedString(object.Object, "status", field) + if err != nil { + klog.Warningf("failed to parse status.%s as string from %v: %v", field, object.GetName(), err) + return "" + } + if !found { + return "" } - return "" + return value } // parseSelectorFromUnstructured can parse labelSelector as selector from unstructured workload object @@ -408,9 +415,14 @@ func parseSelectorFromUnstructured(object *unstructured.Unstructured) (labels.Se if err != nil || !found { return nil, err } - byteInfo, _ := json.Marshal(m) + byteInfo, err := json.Marshal(m) + if err != nil { + return nil, fmt.Errorf("marshal spec.selector of %v failed: %w", object.GetName(), err) + } labelSelector := &metav1.LabelSelector{} - _ = json.Unmarshal(byteInfo, labelSelector) + if err := json.Unmarshal(byteInfo, labelSelector); err != nil { + return nil, fmt.Errorf("unmarshal spec.selector of %v failed: %w", object.GetName(), err) + } return metav1.LabelSelectorAsSelector(labelSelector) } @@ -421,8 +433,15 @@ func parseTemplateFromUnstructured(object *unstructured.Unstructured) *corev1.Po return nil } template := &corev1.PodTemplateSpec{} - templateByte, _ := json.Marshal(t) - _ = json.Unmarshal(templateByte, template) + templateByte, err := json.Marshal(t) + if err != nil { + klog.Warningf("failed to marshal spec.template of %v: %v", object.GetName(), err) + return nil + } + if err := json.Unmarshal(templateByte, template); err != nil { + klog.Warningf("failed to unmarshal spec.template of %v: %v", object.GetName(), err) + return nil + } return template } @@ -432,17 +451,30 @@ func parseMetadataFromUnstructured(object *unstructured.Unstructured) *metav1.Ob if err != nil || !found { return nil } - data, _ := json.Marshal(m) + data, err := json.Marshal(m) + if err != nil { + klog.Warningf("failed to marshal metadata of %v: %v", object.GetName(), err) + return nil + } meta := &metav1.ObjectMeta{} - _ = json.Unmarshal(data, meta) + if err := json.Unmarshal(data, meta); err != nil { + klog.Warningf("failed to unmarshal metadata of %v: %v", object.GetName(), err) + return nil + } return meta } // unmarshalIntStr return *intstr.IntOrString func unmarshalIntStr(m interface{}) *intstr.IntOrString { field := &intstr.IntOrString{} - data, _ := json.Marshal(m) - _ = json.Unmarshal(data, field) + data, err := json.Marshal(m) + if err != nil { + klog.Warningf("failed to marshal intOrString value %v: %v", m, err) + return field + } + if err := json.Unmarshal(data, field); err != nil { + klog.Warningf("failed to unmarshal intOrString value %v: %v", m, err) + } return field } diff --git a/pkg/util/parse_utils_test.go b/pkg/util/parse_utils_test.go index 4d522118..eb22e12d 100644 --- a/pkg/util/parse_utils_test.go +++ b/pkg/util/parse_utils_test.go @@ -632,3 +632,54 @@ func TestNativeDaemonSetUnstructuredParse(t *testing.T) { Expect(workloadInfo.Status.ObservedGeneration).Should(BeNumerically("==", ds.Status.ObservedGeneration)) }) } + +// TestParseStatusStringFromUnstructuredMalformed verifies that a non-string +// status field no longer panics (it used to do an unchecked type assertion) and +// instead degrades to an empty string. Regression test for the parse_utils +// type-assertion panic fixed alongside the MinReadySeconds review. +func TestParseStatusStringFromUnstructuredMalformed(t *testing.T) { + cases := map[string]struct { + status interface{} + want string + }{ + "string value": {status: "rev-abc", want: "rev-abc"}, + "int value": {status: int64(42), want: ""}, + "map value": {status: map[string]interface{}{"nested": "x"}, want: ""}, + "bool value": {status: true, want: ""}, + "missing field": {status: nil, want: ""}, + } + for name, cs := range cases { + t.Run(name, func(t *testing.T) { + statusMap := map[string]interface{}{} + if cs.status != nil { + statusMap["updateRevision"] = cs.status + } + obj := &unstructured.Unstructured{Object: map[string]interface{}{ + "metadata": map[string]interface{}{"name": "demo"}, + "status": statusMap, + }} + got := parseStatusStringFromUnstructured(obj, "updateRevision") + if got != cs.want { + t.Fatalf("parseStatusStringFromUnstructured = %q, want %q", got, cs.want) + } + }) + } +} + +// TestParseSelectorFromUnstructuredMalformed verifies a malformed selector +// surfaces an error instead of being silently swallowed into an empty selector. +func TestParseSelectorFromUnstructuredMalformed(t *testing.T) { + // spec.selector whose matchLabels is a string (not a map) fails to unmarshal + // into metav1.LabelSelector. + obj := &unstructured.Unstructured{Object: map[string]interface{}{ + "metadata": map[string]interface{}{"name": "demo"}, + "spec": map[string]interface{}{ + "selector": map[string]interface{}{ + "matchLabels": "not-a-map", + }, + }, + }} + if _, err := parseSelectorFromUnstructured(obj); err == nil { + t.Fatalf("parseSelectorFromUnstructured accepted malformed selector, want error") + } +} diff --git a/pkg/webhook/util/writer/fs.go b/pkg/webhook/util/writer/fs.go index 60962d14..831f1ac2 100644 --- a/pkg/webhook/util/writer/fs.go +++ b/pkg/webhook/util/writer/fs.go @@ -126,8 +126,9 @@ func prepareToWrite(dir string) error { switch { case os.IsNotExist(err): klog.Info("cert directory doesn't exist, creating", "directory", dir) - // TODO: figure out if we can reduce the permission. (Now it's 0777) - err = os.MkdirAll(dir, 0777) + // The directory holds the CA/server private keys, so restrict it to the + // owner (0700) instead of world-accessible 0777. + err = os.MkdirAll(dir, 0700) if err != nil { return fmt.Errorf("can't create dir: %v, err: %s", dir, err.Error()) } @@ -201,31 +202,32 @@ func ensureExist(dir string) error { } func certToProjectionMap(cert *generator.Artifacts) map[string]atomic.FileProjection { - // TODO: figure out if we can reduce the permission. (Now it's 0666) + // Private keys are restricted to the owner (0600); public certificates may + // stay world-readable (0644). return map[string]atomic.FileProjection{ CAKeyName: { Data: cert.CAKey, - Mode: 0666, + Mode: 0600, }, CACertName: { Data: cert.CACert, - Mode: 0666, + Mode: 0644, }, ServerCertName: { Data: cert.Cert, - Mode: 0666, + Mode: 0644, }, ServerCertName2: { Data: cert.Cert, - Mode: 0666, + Mode: 0644, }, ServerKeyName: { Data: cert.Key, - Mode: 0666, + Mode: 0600, }, ServerKeyName2: { Data: cert.Key, - Mode: 0666, + Mode: 0600, }, } } diff --git a/pkg/webhook/util/writer/fs_test.go b/pkg/webhook/util/writer/fs_test.go new file mode 100644 index 00000000..c48b50a8 --- /dev/null +++ b/pkg/webhook/util/writer/fs_test.go @@ -0,0 +1,77 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package writer + +import ( + "os" + "path" + "path/filepath" + "testing" + + "github.com/openkruise/rollouts/pkg/webhook/util/generator" +) + +// TestWriteCertsToDirPermissions asserts the cert directory and the files it +// holds get restrictive permissions: the directory is owner-only (0700), the +// CA/server private keys are 0600, and the public certificates may stay 0644. +// This locks down the fix for the over-permissive 0777 dir / 0666 key writes. +func TestWriteCertsToDirPermissions(t *testing.T) { + base := t.TempDir() + dir := filepath.Join(base, "certs") + + certs := &generator.Artifacts{ + CAKey: []byte("ca-key"), + CACert: []byte("ca-cert"), + Key: []byte("server-key"), + Cert: []byte("server-cert"), + } + + if err := WriteCertsToDir(dir, certs); err != nil { + t.Fatalf("WriteCertsToDir failed: %v", err) + } + + info, err := os.Stat(dir) + if err != nil { + t.Fatalf("stat dir failed: %v", err) + } + if perm := info.Mode().Perm(); perm != 0700 { + t.Fatalf("cert dir perm = %#o, want 0700", perm) + } + + privateKeys := []string{CAKeyName, ServerKeyName, ServerKeyName2} + for _, name := range privateKeys { + assertFilePerm(t, dir, name, 0600) + } + + publicCerts := []string{CACertName, ServerCertName, ServerCertName2} + for _, name := range publicCerts { + assertFilePerm(t, dir, name, 0644) + } +} + +func assertFilePerm(t *testing.T, dir, name string, want os.FileMode) { + t.Helper() + // The atomic writer exposes the payload through a symlink to a timestamped + // directory; Stat (not Lstat) follows it to the real file we care about. + info, err := os.Stat(path.Join(dir, name)) + if err != nil { + t.Fatalf("stat %s failed: %v", name, err) + } + if perm := info.Mode().Perm(); perm != want { + t.Fatalf("%s perm = %#o, want %#o", name, perm, want) + } +} diff --git a/pkg/webhook/workload/mutating/workload_update_handler.go b/pkg/webhook/workload/mutating/workload_update_handler.go index 128f796e..7564cef8 100644 --- a/pkg/webhook/workload/mutating/workload_update_handler.go +++ b/pkg/webhook/workload/mutating/workload_update_handler.go @@ -330,7 +330,19 @@ func (h *WorkloadHandler) handleDeployment(newObj, oldObj *apps.Deployment) (boo newObj.Labels[appsv1alpha1.DeploymentStableRevisionLabel] = stableRS.Labels[apps.DefaultDeploymentUniqueLabelKey] } - if !shouldSkipRecreateMutationForMinReady(rollout) { + if shouldSkipRecreateMutationForMinReady(rollout) { + // MinReady keeps the native controller running, so it must NOT be paused. + // Inflate the strategy synchronously at admission time: this snapshots the + // original fields into annotations and sets minReadySeconds/maxUnavailable + // so the native controller never observes the user's original budget in the + // window between admission and MinReadyControl.Initialize. Initialize stays + // the fallback and validates (instead of rewriting) annotations that exist. + if err := partitiondeployment.EnrollMinReadyDeployment(newObj); err != nil { + // Do not block admission; the controller's Initialize will surface a + // degraded condition for an unsupported strategy instead. + klog.Warningf("Skip MinReady enrollment for Deployment(%s/%s): %v", newObj.Namespace, newObj.Name, err) + } + } else { // Partition/Recreate style disables the native Deployment controller. newObj.Spec.Paused = true } @@ -459,6 +471,16 @@ func isEffectiveDeploymentRevisionChange(oldObj, newObj *apps.Deployment) bool { return true } +// shouldSkipRecreateMutationForMinReady reports whether the Deployment should be +// driven by the MinReadySeconds strategy (keep RollingUpdate, do not pause) +// instead of the legacy Recreate-style mutation. +// +// It only checks Canary because a Rollout cannot declare BlueGreen and Canary at +// the same time: the validating webhook rejects that combination +// (pkg/webhook/rollout/validating/rollout_create_update_handler.go, +// "Canary and BlueGreen cannot both be set"). With BlueGreen==nil guaranteed, +// Canary!=nil && !EnableExtraWorkloadForCanary is equivalent to the executor's +// GetRollingStyle()==Partition routing, so both sides agree on MinReady. func shouldSkipRecreateMutationForMinReady(rollout *appsv1beta1.Rollout) bool { return rollout.Spec.Strategy.Canary != nil && !rollout.Spec.Strategy.Canary.EnableExtraWorkloadForCanary && @@ -470,6 +492,22 @@ func enforceMinReadyInflation(deployment *apps.Deployment) bool { return false } modified := false + // The MinReady strategy relies on the native RollingUpdate controller staying + // active and driven by inflated fields. Re-assert the core invariants here so a + // GitOps/manual write of Recreate or paused=true is rejected at admission time + // rather than only surfacing as a controller-side degraded condition later. + if deployment.Spec.Strategy.Type != apps.RollingUpdateDeploymentStrategyType { + deployment.Spec.Strategy.Type = apps.RollingUpdateDeploymentStrategyType + modified = true + } + if deployment.Spec.Paused { + deployment.Spec.Paused = false + modified = true + } + if deployment.Spec.Strategy.RollingUpdate == nil { + deployment.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{} + modified = true + } if deployment.Spec.MinReadySeconds != partitiondeployment.InflatedMinReadySeconds { deployment.Spec.MinReadySeconds = partitiondeployment.InflatedMinReadySeconds modified = true diff --git a/pkg/webhook/workload/mutating/workload_update_handler_test.go b/pkg/webhook/workload/mutating/workload_update_handler_test.go index 43b54afd..f38f4a29 100644 --- a/pkg/webhook/workload/mutating/workload_update_handler_test.go +++ b/pkg/webhook/workload/mutating/workload_update_handler_test.go @@ -44,6 +44,7 @@ import ( rolloutapi "github.com/openkruise/rollouts/api" appsv1alpha1 "github.com/openkruise/rollouts/api/v1alpha1" appsv1beta1 "github.com/openkruise/rollouts/api/v1beta1" + partitiondeployment "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment" "github.com/openkruise/rollouts/pkg/feature" "github.com/openkruise/rollouts/pkg/util" utilfeature "github.com/openkruise/rollouts/pkg/util/feature" @@ -422,18 +423,33 @@ func TestHandlerDeployment(t *testing.T) { }, }, { - name: "deployment image v1->v2, matched minready rollout keeps deployment unpaused", + name: "deployment image v1->v2, matched minready rollout inflates strategy at admission and stays unpaused", getObjs: func() (*apps.Deployment, *apps.Deployment) { oldObj := deploymentDemo.DeepCopy() + oldObj.Spec.Strategy.Type = apps.RollingUpdateDeploymentStrategyType newObj := deploymentDemo.DeepCopy() + newObj.Spec.Strategy.Type = apps.RollingUpdateDeploymentStrategyType newObj.Spec.Template.Spec.Containers[0].Image = "echoserver:v2" return oldObj, newObj }, expectObj: func() *apps.Deployment { obj := deploymentDemo.DeepCopy() + obj.Spec.Strategy.Type = apps.RollingUpdateDeploymentStrategyType obj.Spec.Template.Spec.Containers[0].Image = "echoserver:v2" obj.Annotations[util.InRolloutProgressingAnnotation] = `{"rolloutName":"rollout-demo"}` + // P1-6: enrollment snapshots original strategy fields and inflates + // minReadySeconds/progressDeadline/maxUnavailable synchronously so the + // native controller never observes the original budget before Initialize. + obj.Annotations[partitiondeployment.AnnotationOriginalMinReadySeconds] = "0" + obj.Annotations[partitiondeployment.AnnotationOriginalProgressDeadlineSeconds] = partitiondeployment.AnnotationValueKubernetesDefault + obj.Annotations[partitiondeployment.AnnotationOriginalMaxUnavailable] = partitiondeployment.AnnotationValueKubernetesDefault + obj.Annotations[partitiondeployment.AnnotationOriginalMaxSurge] = partitiondeployment.AnnotationValueKubernetesDefault obj.Spec.Paused = false + obj.Spec.MinReadySeconds = partitiondeployment.InflatedMinReadySeconds + pds := partitiondeployment.InflatedProgressDeadlineSeconds + obj.Spec.ProgressDeadlineSeconds = &pds + maxUnavailable := intstr.FromInt(0) + obj.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{MaxUnavailable: &maxUnavailable} return obj }, getRs: func() []*apps.ReplicaSet { @@ -885,6 +901,103 @@ func TestShouldSkipRecreateMutationForMinReady(t *testing.T) { } } +// inflatedMinReadyDeployment returns a Deployment in a healthy inflated MinReady +// state: RollingUpdate, unpaused, with original-strategy annotations present. +func inflatedMinReadyDeployment() *apps.Deployment { + pds := partitiondeployment.InflatedProgressDeadlineSeconds + maxUnavailable := intstr.FromInt(0) + return &apps.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + partitiondeployment.AnnotationOriginalMinReadySeconds: "0", + partitiondeployment.AnnotationOriginalProgressDeadlineSeconds: partitiondeployment.AnnotationValueKubernetesDefault, + partitiondeployment.AnnotationOriginalMaxUnavailable: partitiondeployment.AnnotationValueKubernetesDefault, + partitiondeployment.AnnotationOriginalMaxSurge: partitiondeployment.AnnotationValueKubernetesDefault, + }, + }, + Spec: apps.DeploymentSpec{ + Paused: false, + MinReadySeconds: partitiondeployment.InflatedMinReadySeconds, + ProgressDeadlineSeconds: &pds, + Strategy: apps.DeploymentStrategy{ + Type: apps.RollingUpdateDeploymentStrategyType, + RollingUpdate: &apps.RollingUpdateDeployment{MaxUnavailable: &maxUnavailable}, + }, + }, + } +} + +// TestEnforceMinReadyInflation covers P0-2: while a MinReady rollout is +// progressing, the webhook must re-assert the core invariants (RollingUpdate, +// unpaused, non-nil rollingUpdate, inflated fields) so a GitOps/manual drift is +// rejected at admission time rather than only surfacing later in the controller. +func TestEnforceMinReadyInflation(t *testing.T) { + t.Run("no MinReady annotations leaves object untouched", func(t *testing.T) { + d := &apps.Deployment{Spec: apps.DeploymentSpec{Strategy: apps.DeploymentStrategy{Type: apps.RecreateDeploymentStrategyType}}} + if enforceMinReadyInflation(d) { + t.Fatalf("expected no modification without MinReady annotations") + } + if d.Spec.Strategy.Type != apps.RecreateDeploymentStrategyType { + t.Fatalf("strategy type changed unexpectedly: %s", d.Spec.Strategy.Type) + } + }) + + t.Run("healthy inflated state is not modified", func(t *testing.T) { + d := inflatedMinReadyDeployment() + if enforceMinReadyInflation(d) { + t.Fatalf("expected no modification for an already-inflated healthy deployment") + } + }) + + t.Run("strategy type drift to Recreate is rewritten", func(t *testing.T) { + d := inflatedMinReadyDeployment() + d.Spec.Strategy.Type = apps.RecreateDeploymentStrategyType + if !enforceMinReadyInflation(d) { + t.Fatalf("expected modification for strategy type drift") + } + if d.Spec.Strategy.Type != apps.RollingUpdateDeploymentStrategyType { + t.Fatalf("strategy type not restored to RollingUpdate: %s", d.Spec.Strategy.Type) + } + }) + + t.Run("paused drift is reverted", func(t *testing.T) { + d := inflatedMinReadyDeployment() + d.Spec.Paused = true + if !enforceMinReadyInflation(d) { + t.Fatalf("expected modification for paused drift") + } + if d.Spec.Paused { + t.Fatalf("paused not reverted to false") + } + }) + + t.Run("nil rollingUpdate is restored", func(t *testing.T) { + d := inflatedMinReadyDeployment() + d.Spec.Strategy.RollingUpdate = nil + if !enforceMinReadyInflation(d) { + t.Fatalf("expected modification for nil rollingUpdate") + } + if d.Spec.Strategy.RollingUpdate == nil { + t.Fatalf("rollingUpdate not restored") + } + }) + + t.Run("deflated fields are re-inflated", func(t *testing.T) { + d := inflatedMinReadyDeployment() + d.Spec.MinReadySeconds = 5 + d.Spec.ProgressDeadlineSeconds = pointer.Int32(600) + if !enforceMinReadyInflation(d) { + t.Fatalf("expected modification for deflated fields") + } + if d.Spec.MinReadySeconds != partitiondeployment.InflatedMinReadySeconds { + t.Fatalf("minReadySeconds not re-inflated: %d", d.Spec.MinReadySeconds) + } + if d.Spec.ProgressDeadlineSeconds == nil || *d.Spec.ProgressDeadlineSeconds != partitiondeployment.InflatedProgressDeadlineSeconds { + t.Fatalf("progressDeadlineSeconds not re-inflated: %v", d.Spec.ProgressDeadlineSeconds) + } + }) +} + func TestHandlerCloneSet(t *testing.T) { cases := []struct { name string From db836f0c9c04a6296bd9d0af35bf9f69edb87cc3 Mon Sep 17 00:00:00 2001 From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Date: Sun, 14 Jun 2026 17:01:09 +0800 Subject: [PATCH 06/22] fix: address MinReady review follow-ups Signed-off-by: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> --- api/v1beta1/deployment_types.go | 2 - ...t-minready-seconds-progressive-delivery.md | 81 +++++------ .../batchrelease/batchrelease_controller.go | 4 +- .../batchrelease_controller_test.go | 6 +- .../batchrelease/batchrelease_executor.go | 24 ++-- .../partitionstyle/cloneset/control.go | 18 +-- .../partitionstyle/cloneset/control_test.go | 8 +- .../control/partitionstyle/control_plane.go | 77 +++++++---- .../partitionstyle/daemonset/control.go | 16 +-- .../partitionstyle/daemonset/control_test.go | 6 +- .../partitionstyle/deployment/control.go | 16 +-- .../partitionstyle/deployment/control_test.go | 8 +- .../deployment/minready_batch_context.go | 3 +- .../deployment/minready_constants.go | 38 +++-- .../deployment/minready_control.go | 130 +++++++----------- .../deployment/minready_control_test.go | 44 +++--- .../deployment/minready_finalize_test.go | 37 ++--- .../deployment/minready_test_helpers_test.go | 4 - .../control/partitionstyle/interface.go | 8 +- .../control/partitionstyle/minready_status.go | 45 +++--- .../partitionstyle/nativedaemonset/control.go | 20 +-- .../nativedaemonset/control_test.go | 30 ++-- .../partitionstyle/statefulset/control.go | 16 +-- .../statefulset/control_test.go | 10 +- .../mutating/workload_update_handler.go | 29 ++-- .../mutating/workload_update_handler_test.go | 27 ++-- test/e2e/deployment_minready_test.go | 4 +- test/integration/concurrency_test.go | 20 +-- test/integration/deployment_minready_test.go | 1 - test/integration/minready_helpers_test.go | 11 +- 30 files changed, 360 insertions(+), 383 deletions(-) diff --git a/api/v1beta1/deployment_types.go b/api/v1beta1/deployment_types.go index 49884f07..2b3beb9b 100644 --- a/api/v1beta1/deployment_types.go +++ b/api/v1beta1/deployment_types.go @@ -56,7 +56,6 @@ const ( MinReadyOriginalMinReadySecondsAnnotation = "rollouts.kruise.io/original-min-ready-seconds" MinReadyOriginalProgressDeadlineSecondsAnnotation = "rollouts.kruise.io/original-progress-deadline-seconds" MinReadyOriginalMaxUnavailableAnnotation = "rollouts.kruise.io/original-max-unavailable" - MinReadyOriginalMaxSurgeAnnotation = "rollouts.kruise.io/original-max-surge" ) // MinReadyOriginalAnnotations lists all annotations that snapshot the original @@ -65,7 +64,6 @@ var MinReadyOriginalAnnotations = []string{ MinReadyOriginalMinReadySecondsAnnotation, MinReadyOriginalProgressDeadlineSecondsAnnotation, MinReadyOriginalMaxUnavailableAnnotation, - MinReadyOriginalMaxSurgeAnnotation, } // HasMinReadyOriginalAnnotations returns true if the annotations carry any diff --git a/docs/proposals/20260315-deployment-minready-seconds-progressive-delivery.md b/docs/proposals/20260315-deployment-minready-seconds-progressive-delivery.md index aa126df5..c94cef7b 100644 --- a/docs/proposals/20260315-deployment-minready-seconds-progressive-delivery.md +++ b/docs/proposals/20260315-deployment-minready-seconds-progressive-delivery.md @@ -31,7 +31,7 @@ status: implementable - [API Compatibility](#api-compatibility) - [Annotation Schema](#annotation-schema) - [Field Inflation Values](#field-inflation-values) - - [maxSurge Preservation](#maxsurge-preservation) + - [maxSurge Handling](#maxsurge-handling) - [Controller Implementation](#controller-implementation) - [Initialization Process](#initialization-process) - [Batch Upgrade Process](#batch-upgrade-process) @@ -133,7 +133,7 @@ graph TB WH["Workload Update Webhook
feature-gated Recreate skip
preserves RollingUpdate"] EX["BatchRelease Executor
feature-gated controller selection"] MRC["MinReadyControl
(embeds *realController)
Initialize / UpgradeBatch /
CalculateBatchContext / Finalize"] - MS["maxSurge Handling
preserve original value
validation fallback only"] + MS["maxSurge Handling
left to native Deployment"] end subgraph "Kubernetes Native Control Plane" @@ -154,7 +154,7 @@ graph TB EX -->|"6. Route to MinReadyControl"| MRC MRC -->|"7. Initialize:
save original fields
inflate minReadySeconds
set maxUnavailable=0"| API - MRC -.->|"preserve maxSurge
or validation fallback"| MS + MRC -.->|"does not store or mutate maxSurge"| MS API -->|"persists fields + annotations"| DEP MRC -->|"8. UpgradeBatch:
increase maxUnavailable
by batch size"| API @@ -215,30 +215,29 @@ No `CanaryStrategy.DeploymentStrategy`, `ReleasePlan.DeploymentStrategy`, conver #### Annotation Schema -During rollout, the original values of four Deployment fields are persisted in annotations on the Deployment object itself. This makes the rollout state recoverable across controller restarts without any in-memory state. +During rollout, the original values of three Deployment fields are persisted in annotations on the Deployment object itself. This makes the rollout state recoverable across controller restarts without any in-memory state. ``` rollouts.kruise.io/original-min-ready-seconds: "" rollouts.kruise.io/original-progress-deadline-seconds: "" rollouts.kruise.io/original-max-unavailable: "" -rollouts.kruise.io/original-max-surge: "" ``` **Invariants**: -- All four annotations are written and deleted in a single `Patch` operation (relying on the Kubernetes API server's resource-level PATCH atomicity). -- All four present = rollout in progress; all four absent = idle state. -- If the user's original field is `nil` (relying on Kubernetes defaults), the sentinel value `__k8s_default__` is written. This preserves the distinction between "user explicitly set this value" and "user relied on the default", which is important during Finalize. +- All three annotations are written and deleted in a single `Patch` operation (relying on the Kubernetes API server's resource-level PATCH atomicity). +- All three present = rollout in progress; all three absent = idle state. +- If the user's original pointer field is `nil` in tests or fake-client paths, the annotation stores the Kubernetes API default value itself (`600` for `progressDeadlineSeconds`, `25%` for `maxUnavailable`) instead of a sentinel string. **Serialization rules**: | Source type | Example value | Annotation string | |---|---|---| | `int32` (pointer non-nil) | `int32(10)` | `"10"` | -| `int32` (pointer nil) | — | `"__k8s_default__"` | +| `int32` (pointer nil) | — | `"600"` | | `IntOrString` Type=Int | `{Type: Int, IntVal: 5}` | `"5"` | | `IntOrString` Type=String | `{Type: String, StrVal: "25%"}` | `"25%"` | -| `*IntOrString` pointer nil | — | `"__k8s_default__"` | +| `*IntOrString` pointer nil | — | `"25%"` | #### Field Inflation Values @@ -254,19 +253,16 @@ During `Initialize`, the core MinReadySeconds path inflates three Deployment fie `maxSurge` is not a new user-facing policy or a MinReadySeconds rollout knob. It is an existing Kubernetes RollingUpdate field, and the MinReadySeconds path preserves the user's original value by default. -#### maxSurge Preservation +#### maxSurge Handling -Native Deployment RollingUpdate supports surge capacity, and this proposal should not require `maxSurge=1` as a semantic constraint. The implementation follows one internal rule: +Native Deployment RollingUpdate supports surge capacity, and this proposal does not treat `maxSurge` as a MinReadySeconds rollout knob. The implementation follows one internal rule: -1. Store the original `maxSurge` value in `rollouts.kruise.io/original-max-surge`. -2. Preserve the live `maxSurge` value during rollout. -3. If preserving it would make the live RollingUpdate strategy invalid because `maxUnavailable=0` and effective `maxSurge=0`, temporarily use `maxSurge=1` as a validation fallback. -4. Restore the original `maxSurge` value during `Finalize`. +1. Do not store the original `maxSurge` value in a MinReady annotation. +2. Do not mutate `maxSurge` during `Initialize`, `UpgradeBatch`, or `Finalize`. +3. Let the native Deployment strategy retain whatever `maxSurge` value the user or Kubernetes defaulting already provided. The batch-ready calculation is independent from `maxSurge`: count updated pods only after they are `Ready` and have remained ready for the user's original `minReadySeconds`. Therefore, preserving a larger `maxSurge` can increase temporary pod count, but it cannot mark a batch successful early. -This fallback is not a user-visible policy choice. It exists only to satisfy Kubernetes RollingUpdate validation rules while keeping the default behavior as "preserve the user's original `maxSurge`". - #### Controller Implementation A new controller, `MinReadyControl`, is implemented in `pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go`. It implements the existing `partitionstyle.Interface` by embedding the existing `realController`: @@ -301,10 +297,10 @@ func NewMinReadyController(cli client.Client, key types.NamespacedName, gvk sche - PDB presence is not an eligibility failure. PDBs protect Eviction API flows, not Deployment rolling updates, so they are not used as the batch-safety mechanism. 2. **Annotation persistence** (`writeOriginalAnnotations`): - - If any of the four annotations is already present, validate that all four exist (idempotency check) and that the on-disk fields are already inflated. If consistent, no-op. - - Otherwise, serialize the current values of `minReadySeconds`, `progressDeadlineSeconds`, `maxUnavailable`, `maxSurge` per the serialization rules above and write all four annotations. + - If any of the three annotations is already present, validate that all three exist (idempotency check) and that the on-disk fields are already inflated. If consistent, no-op. + - Otherwise, serialize the current values of `minReadySeconds`, `progressDeadlineSeconds`, and `maxUnavailable` per the serialization rules above and write all three annotations. -3. **Field inflation** (`inflateDeploymentStrategy`): Set `minReadySeconds`, `progressDeadlineSeconds`, and `maxUnavailable` to their MinReadySeconds values. Preserve `maxSurge` unless a `maxUnavailable=0 && maxSurge=0` combination would violate Kubernetes RollingUpdate validation; in that case, temporarily use `maxSurge=1`. +3. **Field inflation** (`inflateDeploymentStrategy`): Set `minReadySeconds`, `progressDeadlineSeconds`, and `maxUnavailable` to their MinReadySeconds values. Leave `maxSurge` unchanged. 4. **Atomic commit**: Issue a single `Patch` using `client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{})`. The annotations and field changes are committed together; the Kubernetes API server's resource-level PATCH atomicity guarantees no partial state is observable. @@ -312,10 +308,10 @@ func NewMinReadyController(cli client.Client, key types.NamespacedName, gvk sche `UpgradeBatch(ctx)` is invoked per batch by the BatchRelease executor. It performs: -1. **Inflation invariant** (`ensureInflatedDeploymentStrategy`): Verify and, if necessary, patch the Deployment so `minReadySeconds == MaxReadySeconds` and `progressDeadlineSeconds == MaxProgressSeconds` before each batch operation. `maxSurge` remains preserved except for the validation fallback described above. This makes the inflated fields a rollout-long invariant rather than a one-time initialization side effect. +1. **Inflation invariant** (`ensureInflatedDeploymentStrategy`): Verify and, if necessary, patch the Deployment so `minReadySeconds == MaxReadySeconds` and `progressDeadlineSeconds == MaxProgressSeconds` before each batch operation. `maxSurge` is not part of the MinReady invariant. This makes the inflated fields a rollout-long invariant rather than a one-time initialization side effect. 2. **Target computation**: Read the current `maxUnavailable` and compare against `ctx.DesiredUpdatedReplicas`. - - If `current > target`: external write has increased `maxUnavailable` beyond the batch target → `MinReadyDegraded`. + - If `current > target`: external write or scale-down has left `maxUnavailable` above the batch target. This is legal and self-heals by reducing it to the target. - If `current >= target`: already at target, no-op. 3. **Patch `maxUnavailable = target`**: A single-field Patch using the same optimistic-lock mechanism. The native RollingUpdate controller observes the change and creates new pods accordingly. Because `minReadySeconds` is inflated, the new pods enter `Ready-but-not-Available` from the Deployment controller's perspective. @@ -351,23 +347,23 @@ The controller must use the original `minReadySeconds` saved in the Deployment a `Finalize` restores the Deployment to its pre-rollout state. It performs: 1. If the Deployment object is `nil` (deleted), no-op. -2. If none of the four annotations is present, the Deployment is already in idle state — no-op. +2. If none of the three annotations is present, the Deployment is already in idle state — no-op. 3. **Parse annotations** (`parseOriginalDeploymentStrategy`): | Annotation state | Parse result | Behavior | |---|---|---| -| All four present and parseable | Restored field values (with `nil` indicating "user relied on default") | Normal Finalize. | +| All three present and parseable | Restored field values | Normal Finalize. | | Any one fails to parse (corrupt format) | Error | `MinReadyDegraded`. | | Partial annotations missing | Error | `MinReadyDegraded`. | -| All four missing | — | No-op (already idle). | +| All three missing | — | No-op (already idle). | 4. **Field restoration** (`applyOriginalDeploymentStrategy`): - - `minReadySeconds`: `nil` → set to `0` (Kubernetes default); non-nil → restore original. - - `progressDeadlineSeconds`: `nil` → clear pointer (Kubernetes default `600`s applies); non-nil → restore original. - - If both `maxUnavailable` and `maxSurge` are `nil`, clear the entire `RollingUpdate` block (Kubernetes default applies). - - Otherwise, restore each field individually. + - `minReadySeconds`: restore the parsed integer value. + - `progressDeadlineSeconds`: restore the parsed integer value. + - `maxUnavailable`: restore the parsed int-or-percent value. + - `maxSurge`: not restored from MinReady annotations and not mutated by MinReady finalization. -5. Delete all four annotations and `Patch` atomically. +5. Delete all three annotations and `Patch` atomically. **Why Degraded refuses to silently fall back to Kubernetes defaults**: A user whose original `maxUnavailable` was `50%` and whose annotations were corrupted should not be silently downgraded to the Kubernetes default `25%`. The release-rate change is operationally significant and should be surfaced for human review, not masked. @@ -404,19 +400,25 @@ The state determination is always based on **observable Deployment state**, neve A feature-gated guard is added to `pkg/webhook/workload/mutating/workload_update_handler.go`: ```go -func shouldSkipRecreateMutationForMinReady(rollout *appsv1beta1.Rollout) bool { - return rollout.Spec.Strategy.Canary != nil && - !rollout.Spec.Strategy.Canary.EnableExtraWorkloadForCanary && - utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) +func isMinReadySecondsStrategy(rollout *appsv1beta1.Rollout, deployment *apps.Deployment) bool { + if rollout.Spec.Strategy.Canary == nil || + rollout.Spec.Strategy.Canary.EnableExtraWorkloadForCanary { + return false + } + if utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) { + return true + } + strategy := util.GetDeploymentStrategy(deployment) + return strings.EqualFold(string(strategy.RollingStyle), string(appsv1alpha1.PartitionRollingStyle)) } ``` -The guard splits the mutation into two paths. `shouldSkipRecreateMutationForMinReady` only checks `Canary` because a Rollout cannot declare both `BlueGreen` and `Canary` — the validating webhook rejects that combination — so with `BlueGreen==nil` guaranteed, the guard is equivalent to the executor's `GetRollingStyle()==Partition` routing and both sides agree on MinReady. +The guard splits the mutation into two paths. `isMinReadySecondsStrategy` only checks `Canary` because a Rollout cannot declare both `BlueGreen` and `Canary` — the validating webhook rejects that combination. When the feature gate is disabled mid-rollout, the Deployment's `DeploymentStrategyAnnotation` keeps the webhook symmetric with the executor's MinReady annotation fallback. **Enrollment path (workload entering progressing).** Instead of pausing the Deployment, the webhook synchronously snapshots the original strategy fields into annotations and inflates `minReadySeconds` / `progressDeadlineSeconds` / `maxUnavailable` in place via `EnrollMinReadyDeployment`: ```go -if shouldSkipRecreateMutationForMinReady(rollout) { +if isMinReadySecondsStrategy(rollout, deployment) { // MinReady keeps the native controller running, so it must NOT be paused. // Inflate synchronously at admission time so the native controller never // observes the user's original budget in the window between admission and @@ -547,7 +549,7 @@ This is an accepted alpha limitation. Manual recovery without the controller: kubectl get deploy -n \ -o jsonpath='{.metadata.annotations.rollouts\.kruise\.io/original-min-ready-seconds}{"\n"}{.metadata.annotations.rollouts\.kruise\.io/original-max-unavailable}{"\n"}' ``` - A value of `__k8s_default__` means the field was unset originally (restore by removing it). + If a field was unset originally, the annotation contains the Kubernetes API default value (`600` or `25%`). 2. Restore the original strategy fields and clear the rollout control annotation: ```bash kubectl patch deploy -n --type merge -p '{ @@ -557,7 +559,6 @@ This is an accepted alpha limitation. Manual recovery without the controller: "rollouts.kruise.io/original-min-ready-seconds": null, "rollouts.kruise.io/original-progress-deadline-seconds": null, "rollouts.kruise.io/original-max-unavailable": null, - "rollouts.kruise.io/original-max-surge": null, "rollouts.kruise.io/batch-release-control": null }}}' ``` The native Deployment controller then resumes a normal rolling update to completion. @@ -600,6 +601,6 @@ Users opt in by enabling the feature gate on the kruise-rollout controller. - [ ] Q2 2026 (GSoC weeks 1–6): MinReadyControl core implementation (Initialize / UpgradeBatch / CalculateBatchContext / Finalize) with unit tests - [ ] Q3 2026 (GSoC weeks 7–8): Webhook invariant enforcement and feature-gated strategy selection - [ ] Q3 2026 (GSoC weeks 9–10): End-to-end tests covering the five core scenarios -- [ ] Q3 2026 (GSoC weeks 11–12): PDB coexistence, maxSurge preservation edge cases, documentation +- [ ] Q3 2026 (GSoC weeks 11–12): PDB coexistence and documentation - [ ] TBD: Observability follow-up (status conditions, events, Prometheus metrics) - [ ] TBD: Plan B (custom `ReadinessGate`) if future requirements need PDB-aware workload availability semantics diff --git a/pkg/controller/batchrelease/batchrelease_controller.go b/pkg/controller/batchrelease/batchrelease_controller.go index 3b93f459..f314cabf 100644 --- a/pkg/controller/batchrelease/batchrelease_controller.go +++ b/pkg/controller/batchrelease/batchrelease_controller.go @@ -160,7 +160,7 @@ type BatchReleaseReconciler struct { // and what is in the Rollout.Spec func (r *BatchReleaseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { release := new(v1beta1.BatchRelease) - err := r.Get(context.TODO(), req.NamespacedName, release) + err := r.Get(ctx, req.NamespacedName, release) if err != nil { if errors.IsNotFound(err) { // Object not found, return. Created objects are automatically garbage collected. @@ -201,7 +201,7 @@ func (r *BatchReleaseReconciler) Reconcile(ctx context.Context, req ctrl.Request // executor start to execute the batch release plan. startTimestamp := time.Now() - result, currentStatus, err := r.executor.Do(release) + result, currentStatus, err := r.executor.Do(ctx, release) if err != nil { errList = append(errList, field.InternalError(field.NewPath("do-release"), err)) } diff --git a/pkg/controller/batchrelease/batchrelease_controller_test.go b/pkg/controller/batchrelease/batchrelease_controller_test.go index 87db3cb4..4147a032 100644 --- a/pkg/controller/batchrelease/batchrelease_controller_test.go +++ b/pkg/controller/batchrelease/batchrelease_controller_test.go @@ -841,7 +841,7 @@ func TestExecutorFallsBackToRecreateWhenMinReadyFeatureGateDisabled(t *testing.T WithStatusSubresource(&v1beta1.BatchRelease{}). Build() - controller, err := NewReleasePlanExecutor(cli, rec).getReleaseController(release, release.Status.DeepCopy()) + controller, err := NewReleasePlanExecutor(cli, rec).getReleaseController(context.Background(), release, release.Status.DeepCopy()) if err != nil { t.Fatalf("getReleaseController failed: %v", err) } @@ -869,7 +869,7 @@ func TestMinReadyControlPlaneRecordsInitializedConditionAndEvent(t *testing.T) { WithStatusSubresource(&v1beta1.BatchRelease{}). Build() status := release.Status.DeepCopy() - controller, err := NewReleasePlanExecutor(cli, rec).getReleaseController(release, status) + controller, err := NewReleasePlanExecutor(cli, rec).getReleaseController(context.Background(), release, status) if err != nil { t.Fatalf("getReleaseController failed: %v", err) } @@ -900,7 +900,7 @@ func TestMinReadyControlPlaneAllowsPDBCoexistence(t *testing.T) { WithStatusSubresource(&v1beta1.BatchRelease{}). Build() status := release.Status.DeepCopy() - controller, err := NewReleasePlanExecutor(cli, rec).getReleaseController(release, status) + controller, err := NewReleasePlanExecutor(cli, rec).getReleaseController(context.Background(), release, status) if err != nil { t.Fatalf("getReleaseController failed: %v", err) } diff --git a/pkg/controller/batchrelease/batchrelease_executor.go b/pkg/controller/batchrelease/batchrelease_executor.go index e934c6c0..aeb1bdd4 100644 --- a/pkg/controller/batchrelease/batchrelease_executor.go +++ b/pkg/controller/batchrelease/batchrelease_executor.go @@ -71,7 +71,7 @@ func NewReleasePlanExecutor(cli client.Client, recorder record.EventRecorder) *E } // Do execute the release plan -func (r *Executor) Do(release *v1beta1.BatchRelease) (reconcile.Result, *v1beta1.BatchReleaseStatus, error) { +func (r *Executor) Do(ctx context.Context, release *v1beta1.BatchRelease) (reconcile.Result, *v1beta1.BatchReleaseStatus, error) { klog.InfoS("Starting one round of reconciling release plan", "BatchRelease", client.ObjectKeyFromObject(release), "phase", release.Status.Phase, @@ -79,7 +79,7 @@ func (r *Executor) Do(release *v1beta1.BatchRelease) (reconcile.Result, *v1beta1 "current-batch-state", release.Status.CanaryStatus.CurrentBatchState) newStatus := getInitializedStatus(&release.Status) - workloadController, err := r.getReleaseController(release, newStatus) + workloadController, err := r.getReleaseController(ctx, release, newStatus) if err != nil || workloadController == nil { return reconcile.Result{}, nil, nil } @@ -197,7 +197,7 @@ func (r *Executor) progressBatches(release *v1beta1.BatchRelease, newStatus *v1b } // GetWorkloadController pick the right workload controller to work on the workload -func (r *Executor) getReleaseController(release *v1beta1.BatchRelease, newStatus *v1beta1.BatchReleaseStatus) (control.Interface, error) { +func (r *Executor) getReleaseController(ctx context.Context, release *v1beta1.BatchRelease, newStatus *v1beta1.BatchReleaseStatus) (control.Interface, error) { targetRef := release.Spec.WorkloadRef gvk := schema.FromAPIVersionAndKind(targetRef.APIVersion, targetRef.Kind) if !util.IsSupportedWorkload(gvk) { @@ -236,15 +236,15 @@ func (r *Executor) getReleaseController(release *v1beta1.BatchRelease, newStatus case v1beta1.PartitionRollingStyle, "": if targetRef.APIVersion == appsv1alpha1.GroupVersion.String() && targetRef.Kind == reflect.TypeOf(appsv1alpha1.DaemonSet{}).Name() { klog.InfoS("Using DaemonSet partition-style release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) - return partitionstyle.NewControlPlane(daemonset.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil + return partitionstyle.NewControlPlane(ctx, daemonset.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil } if targetRef.APIVersion == apps.SchemeGroupVersion.String() && targetRef.Kind == reflect.TypeOf(apps.DaemonSet{}).Name() { klog.InfoS("Using Native DaemonSet partition-style release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) - return partitionstyle.NewControlPlane(nativedaemonset.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil + return partitionstyle.NewControlPlane(ctx, nativedaemonset.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil } if targetRef.APIVersion == appsv1alpha1.GroupVersion.String() && targetRef.Kind == reflect.TypeOf(appsv1alpha1.CloneSet{}).Name() { klog.InfoS("Using CloneSet partition-style release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) - return partitionstyle.NewControlPlane(cloneset.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil + return partitionstyle.NewControlPlane(ctx, cloneset.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil } if targetRef.APIVersion == apps.SchemeGroupVersion.String() && targetRef.Kind == reflect.TypeOf(apps.Deployment{}).Name() { // Route to the MinReady controller when the feature gate is enabled, or @@ -254,19 +254,19 @@ func (r *Executor) getReleaseController(release *v1beta1.BatchRelease, newStatus // Deployment as under its control, leaving the workload stuck in a // half-initialized state. Keeping MinReady control lets it finalize and // restore the original fields. - if utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) || r.deploymentHasMinReadyAnnotations(targetKey) { + if utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) || r.deploymentHasMinReadyAnnotations(ctx, targetKey) { klog.InfoS("Using Deployment MinReadySeconds partition-style release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) - return partitionstyle.NewControlPlane(partitiondeployment.NewMinReadyController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil + return partitionstyle.NewControlPlane(ctx, partitiondeployment.NewMinReadyController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil } klog.InfoS("Using Deployment partition-style release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) - return partitionstyle.NewControlPlane(partitiondeployment.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil + return partitionstyle.NewControlPlane(ctx, partitiondeployment.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil } klog.Info("Partition, but use StatefulSet-Like partition-style release controller for this batch release") } // try to use StatefulSet-like rollout controller by default klog.InfoS("Using StatefulSet-Like partition-style release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) - return partitionstyle.NewControlPlane(statefulset.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil + return partitionstyle.NewControlPlane(ctx, statefulset.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil } // deploymentHasMinReadyAnnotations reports whether the target Deployment still @@ -274,9 +274,9 @@ func (r *Executor) getReleaseController(release *v1beta1.BatchRelease, newStatus // MinReady controller and not yet finalized. Used to keep MinReady routing when // the feature gate is disabled mid-rollout. A fetch failure (e.g. NotFound) // returns false so routing falls back to the default controller. -func (r *Executor) deploymentHasMinReadyAnnotations(key types.NamespacedName) bool { +func (r *Executor) deploymentHasMinReadyAnnotations(ctx context.Context, key types.NamespacedName) bool { deployment := &apps.Deployment{} - if err := r.client.Get(context.TODO(), key, deployment); err != nil { + if err := r.client.Get(ctx, key, deployment); err != nil { return false } return v1beta1.HasMinReadyOriginalAnnotations(deployment.Annotations) diff --git a/pkg/controller/batchrelease/control/partitionstyle/cloneset/control.go b/pkg/controller/batchrelease/control/partitionstyle/cloneset/control.go index aef389fa..ce09280d 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/cloneset/control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/cloneset/control.go @@ -78,7 +78,7 @@ func (rc *realController) ListOwnedPods() ([]*corev1.Pod, error) { return rc.pods, err } -func (rc *realController) Initialize(release *v1beta1.BatchRelease) error { +func (rc *realController) Initialize(ctx context.Context, release *v1beta1.BatchRelease) error { if control.IsControlledByBatchRelease(release, rc.object) { return nil } @@ -87,21 +87,21 @@ func (rc *realController) Initialize(release *v1beta1.BatchRelease) error { owner := control.BuildReleaseControlInfo(release) body := fmt.Sprintf(`{"metadata":{"annotations":{"%s":"%s"}},"spec":{"updateStrategy":{"paused":%v,"partition":"%s"}}}`, util.BatchReleaseControlAnnotation, owner, false, "100%") - return rc.client.Patch(context.TODO(), clone, client.RawPatch(types.MergePatchType, []byte(body))) + return rc.client.Patch(ctx, clone, client.RawPatch(types.MergePatchType, []byte(body))) } -func (rc *realController) UpgradeBatch(ctx *batchcontext.BatchContext) error { +func (rc *realController) UpgradeBatch(ctx context.Context, batchContext *batchcontext.BatchContext) error { var body string var desired int - switch partition := ctx.DesiredPartition; partition.Type { + switch partition := batchContext.DesiredPartition; partition.Type { case intstr.Int: desired = int(partition.IntVal) body = fmt.Sprintf(`{"spec":{"updateStrategy":{"partition": %d }}}`, partition.IntValue()) case intstr.String: - desired, _ = intstr.GetScaledValueFromIntOrPercent(&partition, int(ctx.Replicas), true) + desired, _ = intstr.GetScaledValueFromIntOrPercent(&partition, int(batchContext.Replicas), true) body = fmt.Sprintf(`{"spec":{"updateStrategy":{"partition":"%s"}}}`, partition.String()) } - current, _ := intstr.GetScaledValueFromIntOrPercent(&ctx.CurrentPartition, int(ctx.Replicas), true) + current, _ := intstr.GetScaledValueFromIntOrPercent(&batchContext.CurrentPartition, int(batchContext.Replicas), true) // current less than desired, which means current revision replicas will be less than desired, // in other word, update revision replicas will be more than desired, no need to update again. @@ -110,10 +110,10 @@ func (rc *realController) UpgradeBatch(ctx *batchcontext.BatchContext) error { } clone := util.GetEmptyObjectWithKey(rc.object) - return rc.client.Patch(context.TODO(), clone, client.RawPatch(types.MergePatchType, []byte(body))) + return rc.client.Patch(ctx, clone, client.RawPatch(types.MergePatchType, []byte(body))) } -func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { +func (rc *realController) Finalize(ctx context.Context, release *v1beta1.BatchRelease) error { if rc.object == nil { return nil } @@ -134,7 +134,7 @@ func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { body := fmt.Sprintf(`{"metadata":{"annotations":{"%s":null}}%s}`, util.BatchReleaseControlAnnotation, specBody) clone := util.GetEmptyObjectWithKey(rc.object) - return rc.client.Patch(context.TODO(), clone, client.RawPatch(types.MergePatchType, []byte(body))) + return rc.client.Patch(ctx, clone, client.RawPatch(types.MergePatchType, []byte(body))) } func (rc *realController) CalculateBatchContext(release *v1beta1.BatchRelease) (*batchcontext.BatchContext, error) { diff --git a/pkg/controller/batchrelease/control/partitionstyle/cloneset/control_test.go b/pkg/controller/batchrelease/control/partitionstyle/cloneset/control_test.go index 28a4dce7..17968159 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/cloneset/control_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/cloneset/control_test.go @@ -292,7 +292,7 @@ func TestRealController(t *testing.T) { controller, err := c.BuildController() Expect(err).NotTo(HaveOccurred()) - err = controller.Initialize(release) + err = controller.Initialize(context.Background(), release) Expect(err).NotTo(HaveOccurred()) fetch := &kruiseappsv1alpha1.CloneSet{} Expect(cli.Get(context.TODO(), cloneKey, fetch)).NotTo(HaveOccurred()) @@ -303,7 +303,7 @@ func TestRealController(t *testing.T) { for { batchContext, err := controller.CalculateBatchContext(release) Expect(err).NotTo(HaveOccurred()) - err = controller.UpgradeBatch(batchContext) + err = controller.UpgradeBatch(context.Background(), batchContext) fetch = &kruiseappsv1alpha1.CloneSet{} // mock Expect(cli.Get(context.TODO(), cloneKey, fetch)).NotTo(HaveOccurred()) @@ -316,7 +316,7 @@ func TestRealController(t *testing.T) { Expect(cli.Get(context.TODO(), cloneKey, fetch)).NotTo(HaveOccurred()) Expect(fetch.Spec.UpdateStrategy.Partition.StrVal).Should(Equal("90%")) - err = controller.Finalize(release) + err = controller.Finalize(context.Background(), release) Expect(err).NotTo(HaveOccurred()) fetch = &kruiseappsv1alpha1.CloneSet{} Expect(cli.Get(context.TODO(), cloneKey, fetch)).NotTo(HaveOccurred()) @@ -445,7 +445,7 @@ func TestFinalize(t *testing.T) { t.Fatalf("BuildController failed: %s", err.Error()) } cs.featureGateFunc() - err = c.Finalize(br) + err = c.Finalize(context.Background(), br) if err != nil { t.Fatalf("BuildController failed: %s", err.Error()) } diff --git a/pkg/controller/batchrelease/control/partitionstyle/control_plane.go b/pkg/controller/batchrelease/control/partitionstyle/control_plane.go index d921cb22..f341fd6c 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/control_plane.go +++ b/pkg/controller/batchrelease/control/partitionstyle/control_plane.go @@ -41,6 +41,7 @@ type realBatchControlPlane struct { client.Client record.EventRecorder patcher labelpatch.LabelPatcher + ctx context.Context release *v1beta1.BatchRelease newStatus *v1beta1.BatchReleaseStatus } @@ -48,31 +49,43 @@ type realBatchControlPlane struct { type NewInterfaceFunc func(cli client.Client, key types.NamespacedName, gvk schema.GroupVersionKind) Interface // NewControlPlane creates a new release controller with partitioned-style to drive batch release state machine -func NewControlPlane(f NewInterfaceFunc, cli client.Client, recorder record.EventRecorder, release *v1beta1.BatchRelease, newStatus *v1beta1.BatchReleaseStatus, key types.NamespacedName, gvk schema.GroupVersionKind) *realBatchControlPlane { +func NewControlPlane(ctx context.Context, f NewInterfaceFunc, cli client.Client, recorder record.EventRecorder, release *v1beta1.BatchRelease, newStatus *v1beta1.BatchReleaseStatus, key types.NamespacedName, gvk schema.GroupVersionKind) *realBatchControlPlane { return &realBatchControlPlane{ Client: cli, EventRecorder: recorder, newStatus: newStatus, Interface: f(cli, key, gvk), + ctx: nonNilContext(ctx), release: release.DeepCopy(), patcher: labelpatch.NewLabelPatcher(cli, klog.KObj(release), release.Spec.ReleasePlan.Batches), } } +func nonNilContext(ctx context.Context) context.Context { + if ctx != nil { + return ctx + } + return context.Background() +} + func (rc *realBatchControlPlane) Initialize() error { + minReady := isMinReadyController(rc.Interface) controller, err := rc.BuildController() if err != nil { - rc.recordMinReadyDegraded("MinReadyInitializeFailed", err) + rc.recordMinReadyDegradedOrLog(minReady, "MinReadyInitializeFailed", err) return err } + minReady = isMinReadyController(controller) // claim workload under our control - err = controller.Initialize(rc.release) + err = controller.Initialize(rc.ctx, rc.release) if err != nil { - rc.recordMinReadyDegraded("MinReadyInitializeFailed", err) + rc.recordMinReadyDegradedOrLog(minReady, "MinReadyInitializeFailed", err) return err } - rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyInitialized, "MinReadyInitialized", "MinReadySeconds strategy initialized") + if minReady { + rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyInitialized, "MinReadyInitialized", "MinReadySeconds strategy initialized") + } // record revision and replicas workloadInfo := controller.GetWorkloadInfo() @@ -89,54 +102,64 @@ func (rc *realBatchControlPlane) Initialize() error { } func (rc *realBatchControlPlane) UpgradeBatch() error { + minReady := isMinReadyController(rc.Interface) controller, err := rc.BuildController() if err != nil { - rc.recordMinReadyDegraded("MinReadyBatchingFailed", err) + rc.recordMinReadyDegradedOrLog(minReady, "MinReadyBatchingFailed", err) return err } + minReady = isMinReadyController(controller) if controller.GetWorkloadInfo().Replicas == 0 { - rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatching", "MinReadySeconds strategy has no replicas to upgrade") + if minReady { + rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatching", "MinReadySeconds strategy has no replicas to upgrade") + } return nil } err = rc.countAndUpdateNoNeedUpdateReplicas() if err != nil { - rc.recordMinReadyDegraded("MinReadyBatchingFailed", err) + rc.recordMinReadyDegradedOrLog(minReady, "MinReadyBatchingFailed", err) return err } batchContext, err := controller.CalculateBatchContext(rc.release) if err != nil { - rc.recordMinReadyDegraded("MinReadyBatchingFailed", err) + rc.recordMinReadyDegradedOrLog(minReady, "MinReadyBatchingFailed", err) return err } klog.Infof("BatchRelease %v calculated context when upgrade batch: %s", klog.KObj(rc.release), batchContext.Log()) - err = controller.UpgradeBatch(batchContext) + err = controller.UpgradeBatch(rc.ctx, batchContext) if err != nil { - rc.recordMinReadyDegraded("MinReadyBatchingFailed", err) + rc.recordMinReadyDegradedOrLog(minReady, "MinReadyBatchingFailed", err) return err } if err := rc.patcher.PatchPodBatchLabel(batchContext); err != nil { - rc.recordMinReadyDegraded("MinReadyBatchingFailed", err) + rc.recordMinReadyDegradedOrLog(minReady, "MinReadyBatchingFailed", err) return err } - rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatching", "MinReadySeconds strategy advanced the current batch") + if minReady { + rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatching", "MinReadySeconds strategy advanced the current batch") + } return nil } func (rc *realBatchControlPlane) EnsureBatchPodsReadyAndLabeled() error { + minReady := isMinReadyController(rc.Interface) controller, err := rc.BuildController() if err != nil { - rc.recordMinReadyDegraded("MinReadyBatchingFailed", err) + rc.recordMinReadyDegradedOrLog(minReady, "MinReadyBatchingFailed", err) return err } + minReady = isMinReadyController(controller) if controller.GetWorkloadInfo().Replicas == 0 { - rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatchReady", "MinReadySeconds strategy batch is ready") + if minReady { + rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatchReady", "MinReadySeconds strategy batch is ready") + } return nil } @@ -144,7 +167,7 @@ func (rc *realBatchControlPlane) EnsureBatchPodsReadyAndLabeled() error { // the target calculated should be consistent with UpgradeBatch. batchContext, err := controller.CalculateBatchContext(rc.release) if err != nil { - rc.recordMinReadyDegraded("MinReadyBatchingFailed", err) + rc.recordMinReadyDegradedOrLog(minReady, "MinReadyBatchingFailed", err) return err } @@ -152,29 +175,37 @@ func (rc *realBatchControlPlane) EnsureBatchPodsReadyAndLabeled() error { klog.KObj(rc.release), batchContext.Log()) if err := batchContext.IsBatchReady(); err != nil { - observeMinReadyBatchWait(rc.release, util.GetBatchReleaseCondition(*rc.newStatus, v1beta1.RolloutConditionMinReadyBatching)) + if minReady { + observeMinReadyBatchWait(rc.release, util.GetBatchReleaseCondition(*rc.newStatus, v1beta1.RolloutConditionMinReadyBatching)) + } return err } - rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatchReady", "MinReadySeconds strategy batch is ready") + if minReady { + rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatchReady", "MinReadySeconds strategy batch is ready") + } return nil } func (rc *realBatchControlPlane) Finalize() error { + minReady := isMinReadyController(rc.Interface) controller, err := rc.BuildController() if err != nil { if err := client.IgnoreNotFound(err); err != nil { - rc.recordMinReadyDegraded("MinReadyFinalizeFailed", err) + rc.recordMinReadyDegradedOrLog(minReady, "MinReadyFinalizeFailed", err) return err } return nil } + minReady = isMinReadyController(controller) // release workload control info and clean up resources if it needs - if err := controller.Finalize(rc.release); err != nil { - rc.recordMinReadyDegraded("MinReadyFinalizeFailed", err) + if err := controller.Finalize(rc.ctx, rc.release); err != nil { + rc.recordMinReadyDegradedOrLog(minReady, "MinReadyFinalizeFailed", err) return err } - rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyFinalized, "MinReadyFinalized", "MinReadySeconds strategy finalized") + if minReady { + rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyFinalized, "MinReadyFinalized", "MinReadySeconds strategy finalized") + } return nil } @@ -274,7 +305,7 @@ func (rc *realBatchControlPlane) markNoNeedUpdatePodsIfNeeds() (*int32, error) { for _, pod := range filterPods { clone := util.GetEmptyObjectWithKey(pod) body := fmt.Sprintf(`{"metadata":{"labels":{"%s":"%s"}}}`, util.NoNeedUpdatePodLabel, rolloutID) - err = rc.Patch(context.TODO(), clone, client.RawPatch(types.StrategicMergePatchType, []byte(body))) + err = rc.Patch(rc.ctx, clone, client.RawPatch(types.StrategicMergePatchType, []byte(body))) if err != nil { klog.Errorf("Failed to patch no-need-update label(%v) to pod %v, err: %v", rolloutID, klog.KObj(pod), err) return &noNeedUpdateReplicas, err diff --git a/pkg/controller/batchrelease/control/partitionstyle/daemonset/control.go b/pkg/controller/batchrelease/control/partitionstyle/daemonset/control.go index 90f6c7fd..3c2dceba 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/daemonset/control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/daemonset/control.go @@ -92,7 +92,7 @@ func (rc *realController) ListOwnedPods() ([]*corev1.Pod, error) { return rc.pods, err } -func (rc *realController) Initialize(release *v1beta1.BatchRelease) error { +func (rc *realController) Initialize(ctx context.Context, release *v1beta1.BatchRelease) error { if control.IsControlledByBatchRelease(release, rc.object) { return nil } @@ -102,13 +102,13 @@ func (rc *realController) Initialize(release *v1beta1.BatchRelease) error { body := fmt.Sprintf(`{"metadata":{"annotations":{"%s":"%s"}},"spec":{"updateStrategy":{"rollingUpdate":{"paused":%v,"partition":%d}}}}`, util.BatchReleaseControlAnnotation, owner, false, rc.Replicas) - return rc.client.Patch(context.TODO(), daemon, client.RawPatch(types.MergePatchType, []byte(body))) + return rc.client.Patch(ctx, daemon, client.RawPatch(types.MergePatchType, []byte(body))) } -func (rc *realController) UpgradeBatch(ctx *batchcontext.BatchContext) error { +func (rc *realController) UpgradeBatch(ctx context.Context, batchContext *batchcontext.BatchContext) error { - desired := ctx.DesiredPartition.IntVal - current := ctx.CurrentPartition.IntVal + desired := batchContext.DesiredPartition.IntVal + current := batchContext.CurrentPartition.IntVal // current less than desired, which means current revision replicas will be less than desired, // in other word, update revision replicas will be more than desired, no need to update again. if current <= desired { @@ -118,10 +118,10 @@ func (rc *realController) UpgradeBatch(ctx *batchcontext.BatchContext) error { body := fmt.Sprintf(`{"spec":{"updateStrategy":{"rollingUpdate":{"partition":%d}}}}`, desired) daemon := util.GetEmptyObjectWithKey(rc.object) - return rc.client.Patch(context.TODO(), daemon, client.RawPatch(types.MergePatchType, []byte(body))) + return rc.client.Patch(ctx, daemon, client.RawPatch(types.MergePatchType, []byte(body))) } -func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { +func (rc *realController) Finalize(ctx context.Context, release *v1beta1.BatchRelease) error { if rc.object == nil { return nil } @@ -137,7 +137,7 @@ func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { } body := fmt.Sprintf(`{"metadata":{"annotations":{"%s":null}}%s}`, util.BatchReleaseControlAnnotation, specBody) daemon := util.GetEmptyObjectWithKey(rc.object) - return rc.client.Patch(context.TODO(), daemon, client.RawPatch(types.MergePatchType, []byte(body))) + return rc.client.Patch(ctx, daemon, client.RawPatch(types.MergePatchType, []byte(body))) } func (rc *realController) CalculateBatchContext(release *v1beta1.BatchRelease) (*batchcontext.BatchContext, error) { diff --git a/pkg/controller/batchrelease/control/partitionstyle/daemonset/control_test.go b/pkg/controller/batchrelease/control/partitionstyle/daemonset/control_test.go index 1b42041a..fa98bebc 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/daemonset/control_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/daemonset/control_test.go @@ -341,7 +341,7 @@ func TestRealController(t *testing.T) { controller, err := c.BuildController() Expect(err).NotTo(HaveOccurred()) - err = controller.Initialize(release) + err = controller.Initialize(context.Background(), release) Expect(err).NotTo(HaveOccurred()) fetch := &kruiseappsv1alpha1.DaemonSet{} @@ -356,7 +356,7 @@ func TestRealController(t *testing.T) { for { batchContext, err := controller.CalculateBatchContext(release) Expect(err).NotTo(HaveOccurred()) - err = controller.UpgradeBatch(batchContext) + err = controller.UpgradeBatch(context.Background(), batchContext) fetch = &kruiseappsv1alpha1.DaemonSet{} // mock Expect(cli.Get(context.TODO(), daemonKey, fetch)).NotTo(HaveOccurred()) @@ -371,7 +371,7 @@ func TestRealController(t *testing.T) { fmt.Println(*fetch.Spec.UpdateStrategy.RollingUpdate.Partition) Expect(*fetch.Spec.UpdateStrategy.RollingUpdate.Partition).Should(Equal(int32(9))) - err = controller.Finalize(release) + err = controller.Finalize(context.Background(), release) Expect(err).NotTo(HaveOccurred()) fetch = &kruiseappsv1alpha1.DaemonSet{} Expect(cli.Get(context.TODO(), daemonKey, fetch)).NotTo(HaveOccurred()) diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/control.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/control.go index 4623e6bd..da3690a7 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/control.go @@ -89,7 +89,7 @@ func (rc *realController) ListOwnedPods() ([]*corev1.Pod, error) { return rc.pods, err } -func (rc *realController) Initialize(release *v1beta1.BatchRelease) error { +func (rc *realController) Initialize(ctx context.Context, release *v1beta1.BatchRelease) error { if deploymentutil.IsUnderRolloutControl(rc.object) { return nil // No need initialize again. } @@ -118,10 +118,10 @@ func (rc *realController) Initialize(release *v1beta1.BatchRelease) error { // Disable the native deployment controller patchData.UpdatePaused(true) patchData.UpdateStrategy(apps.DeploymentStrategy{Type: apps.RecreateDeploymentStrategyType}) - return rc.client.Patch(context.TODO(), d, patchData) + return rc.client.Patch(ctx, d, patchData) } -func (rc *realController) UpgradeBatch(ctx *batchcontext.BatchContext) error { +func (rc *realController) UpgradeBatch(ctx context.Context, batchContext *batchcontext.BatchContext) error { if !deploymentutil.IsUnderRolloutControl(rc.object) { klog.Warningf("Cannot upgrade batch, because "+ "deployment %v has ridden out of our control", klog.KObj(rc.object)) @@ -129,18 +129,18 @@ func (rc *realController) UpgradeBatch(ctx *batchcontext.BatchContext) error { } strategy := util.GetDeploymentStrategy(rc.object) - if control.IsCurrentMoreThanOrEqualToDesired(strategy.Partition, ctx.DesiredPartition) { + if control.IsCurrentMoreThanOrEqualToDesired(strategy.Partition, batchContext.DesiredPartition) { return nil // Satisfied, no need patch again. } d := rc.object.DeepCopy() - strategy.Partition = ctx.DesiredPartition + strategy.Partition = batchContext.DesiredPartition patchData := patch.NewDeploymentPatch() patchData.InsertAnnotation(v1alpha1.DeploymentStrategyAnnotation, util.DumpJSON(&strategy)) - return rc.client.Patch(context.TODO(), d, patchData) + return rc.client.Patch(ctx, d, patchData) } -func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { +func (rc *realController) Finalize(ctx context.Context, release *v1beta1.BatchRelease) error { if rc.object == nil { return nil // No need to finalize again. } @@ -169,7 +169,7 @@ func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { } d := rc.object.DeepCopy() patchData.DeleteAnnotation(util.BatchReleaseControlAnnotation) - return rc.client.Patch(context.TODO(), d, patchData) + return rc.client.Patch(ctx, d, patchData) } func (rc *realController) CalculateBatchContext(release *v1beta1.BatchRelease) (*batchcontext.BatchContext, error) { diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/control_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/control_test.go index 7116d7c8..59ddc56d 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/control_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/control_test.go @@ -321,7 +321,7 @@ func TestRealController(t *testing.T) { controller, err := c.BuildController() Expect(err).NotTo(HaveOccurred()) - err = controller.Initialize(release) + err = controller.Initialize(context.Background(), release) Expect(err).NotTo(HaveOccurred()) fetch := &apps.Deployment{} Expect(cli.Get(context.TODO(), deploymentKey, fetch)).NotTo(HaveOccurred()) @@ -335,7 +335,7 @@ func TestRealController(t *testing.T) { for { batchContext, err := controller.CalculateBatchContext(release) Expect(err).NotTo(HaveOccurred()) - err = controller.UpgradeBatch(batchContext) + err = controller.UpgradeBatch(context.Background(), batchContext) fetch := &apps.Deployment{} // mock Expect(cli.Get(context.TODO(), deploymentKey, fetch)).NotTo(HaveOccurred()) @@ -350,7 +350,7 @@ func TestRealController(t *testing.T) { Expect(strategy.Partition.StrVal).Should(Equal("50%")) release.Spec.ReleasePlan.BatchPartition = nil - err = controller.Finalize(release) + err = controller.Finalize(context.Background(), release) Expect(err).NotTo(HaveOccurred()) fetch = &apps.Deployment{} Expect(cli.Get(context.TODO(), deploymentKey, fetch)).NotTo(HaveOccurred()) @@ -498,7 +498,7 @@ func TestFinalize(t *testing.T) { t.Fatalf("BuildController failed: %s", err.Error()) } cs.featureGateFunc() - err = c.Finalize(br) + err = c.Finalize(context.Background(), br) if err != nil { t.Fatalf("BuildController failed: %s", err.Error()) } diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_batch_context.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_batch_context.go index 3d60503d..d0540b8f 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_batch_context.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_batch_context.go @@ -50,8 +50,7 @@ func countUpdatedAvailablePods(pods []*corev1.Pod, updateRevision string, minRea if ready == nil || ready.Status != corev1.ConditionTrue { return false } - return ready.LastTransitionTime.Add(time.Duration(minReadySeconds)*time.Second).Before(now) || - ready.LastTransitionTime.Add(time.Duration(minReadySeconds)*time.Second).Equal(now) + return !ready.LastTransitionTime.Add(time.Duration(minReadySeconds) * time.Second).After(now) })) } diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_constants.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_constants.go index 5742f014..569db504 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_constants.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_constants.go @@ -34,27 +34,26 @@ const ( AnnotationOriginalMinReadySeconds = v1beta1.MinReadyOriginalMinReadySecondsAnnotation AnnotationOriginalProgressDeadlineSeconds = v1beta1.MinReadyOriginalProgressDeadlineSecondsAnnotation AnnotationOriginalMaxUnavailable = v1beta1.MinReadyOriginalMaxUnavailableAnnotation - AnnotationOriginalMaxSurge = v1beta1.MinReadyOriginalMaxSurgeAnnotation - AnnotationValueKubernetesDefault = "__k8s_default__" + DefaultProgressDeadlineSeconds int32 = 600 + DefaultMaxUnavailable = "25%" InflatedMinReadySeconds int32 = v1beta1.MaxReadySeconds InflatedProgressDeadlineSeconds int32 = v1beta1.MaxProgressSeconds - InflatedMaxSurgeInt int32 = 1 ) var AllOriginalAnnotations = v1beta1.MinReadyOriginalAnnotations func serializeOriginalInt32(value *int32) string { if value == nil { - return AnnotationValueKubernetesDefault + return strconv.FormatInt(int64(DefaultProgressDeadlineSeconds), 10) } return strconv.FormatInt(int64(*value), 10) } func serializeOriginalIntOrString(value *intstr.IntOrString) string { if value == nil { - return AnnotationValueKubernetesDefault + return DefaultMaxUnavailable } if value.Type == intstr.String { return value.StrVal @@ -63,9 +62,12 @@ func serializeOriginalIntOrString(value *intstr.IntOrString) string { } func parseOriginalInt32(annotations map[string]string, key string) (*int32, error) { - raw, err := readOriginalAnnotation(annotations, key) - if err != nil || raw == AnnotationValueKubernetesDefault { - return nil, err + raw, ok := annotations[key] + if !ok { + return nil, fmt.Errorf("annotation %s missing: %w", key, partitionstyle.ErrMinReadyAnnotationInvalid) + } + if raw == "" { + return nil, fmt.Errorf("annotation %s present but empty: %w", key, partitionstyle.ErrMinReadyAnnotationInvalid) } n, err := strconv.ParseInt(raw, 10, 32) if err != nil { @@ -76,9 +78,12 @@ func parseOriginalInt32(annotations map[string]string, key string) (*int32, erro } func parseOriginalIntOrString(annotations map[string]string, key string) (*intstr.IntOrString, error) { - raw, err := readOriginalAnnotation(annotations, key) - if err != nil || raw == AnnotationValueKubernetesDefault { - return nil, err + raw, ok := annotations[key] + if !ok { + return nil, fmt.Errorf("annotation %s missing: %w", key, partitionstyle.ErrMinReadyAnnotationInvalid) + } + if raw == "" { + return nil, fmt.Errorf("annotation %s present but empty: %w", key, partitionstyle.ErrMinReadyAnnotationInvalid) } if strings.HasSuffix(raw, "%") { if _, err := strconv.Atoi(strings.TrimSuffix(raw, "%")); err != nil { @@ -95,17 +100,6 @@ func parseOriginalIntOrString(annotations map[string]string, key string) (*intst return &v, nil } -func readOriginalAnnotation(annotations map[string]string, key string) (string, error) { - raw, ok := annotations[key] - if !ok { - return "", fmt.Errorf("annotation %s missing: %w", key, partitionstyle.ErrMinReadyAnnotationInvalid) - } - if raw == "" { - return "", fmt.Errorf("annotation %s present but empty: %w", key, partitionstyle.ErrMinReadyAnnotationInvalid) - } - return raw, nil -} - func hasAnyOriginalAnnotation(annotations map[string]string) bool { return v1beta1.HasMinReadyOriginalAnnotations(annotations) } diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go index b07a3ca2..ec57c0e9 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go @@ -39,6 +39,10 @@ type MinReadyControl struct { *realController } +func (mc *MinReadyControl) IsMinReadyControl() bool { + return true +} + func (mc *MinReadyControl) BuildController() (partitionstyle.Interface, error) { if mc.realController == nil { return nil, fmt.Errorf("MinReadyControl.BuildController: realController is nil") @@ -54,40 +58,42 @@ func (mc *MinReadyControl) BuildController() (partitionstyle.Interface, error) { return &MinReadyControl{realController: rc}, nil } -func (mc *MinReadyControl) Initialize(release *v1beta1.BatchRelease) error { +func (mc *MinReadyControl) Initialize(ctx context.Context, release *v1beta1.BatchRelease) error { if release == nil { return fmt.Errorf("MinReadyControl.Initialize: release is nil") } if err := mc.ensureInitializeAllowed(); err != nil { return fmt.Errorf("MinReadyControl.Initialize: %w", err) } - original := mc.object.DeepCopy() - modified := original.DeepCopy() - if err := writeOriginalAnnotations(original, modified); err != nil { - return fmt.Errorf("MinReadyControl.Initialize: %w", err) - } + original := mc.object + modified := mc.object.DeepCopy() if hasAnyOriginalAnnotation(original.Annotations) { + if err := ensureOriginalAnnotations(original); err != nil { + return fmt.Errorf("MinReadyControl.Initialize: %w", err) + } if err := validateInflatedDeploymentStrategy(original); err != nil { return fmt.Errorf("MinReadyControl.Initialize: %w", err) } + } else { + writeOriginalAnnotations(original, modified) } modified.Annotations[util.BatchReleaseControlAnnotation] = util.DumpJSON(metav1.NewControllerRef( release, release.GetObjectKind().GroupVersionKind())) inflateDeploymentStrategy(modified) patch := client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{}) - return mc.client.Patch(context.TODO(), modified, patch) + return mc.client.Patch(ctx, modified, patch) } -func (mc *MinReadyControl) UpgradeBatch(ctx *batchcontext.BatchContext) error { - if err := mc.ensureInflatedDeploymentStrategy(); err != nil { - return fmt.Errorf("MinReadyControl.UpgradeBatch[%d]: %w", ctx.CurrentBatch, err) +func (mc *MinReadyControl) UpgradeBatch(ctx context.Context, batchContext *batchcontext.BatchContext) error { + if err := mc.ensureInflatedDeploymentStrategy(ctx); err != nil { + return fmt.Errorf("MinReadyControl.UpgradeBatch[%d]: %w", batchContext.CurrentBatch, err) } current, err := intstr.GetScaledValueFromIntOrPercent( - mc.object.Spec.Strategy.RollingUpdate.MaxUnavailable, int(ctx.Replicas), true) + mc.object.Spec.Strategy.RollingUpdate.MaxUnavailable, int(batchContext.Replicas), true) if err != nil { - return fmt.Errorf("MinReadyControl.UpgradeBatch[%d]: %w", ctx.CurrentBatch, err) + return fmt.Errorf("MinReadyControl.UpgradeBatch[%d]: %w", batchContext.CurrentBatch, err) } - target := ctx.DesiredUpdatedReplicas + target := batchContext.DesiredUpdatedReplicas if int32(current) == target { return nil } @@ -95,18 +101,21 @@ func (mc *MinReadyControl) UpgradeBatch(ctx *batchcontext.BatchContext) error { // maxUnavailable above the batch target is a legal state after a // scale-down (HPA or manual) and also self-heals external tampering; // converge it back to the target instead of reporting degraded drift. - klog.Warningf("MinReadyControl.UpgradeBatch[%d]: deployment %v maxUnavailable=%d exceeds target=%d, reducing it to the target", - ctx.CurrentBatch, klog.KObj(mc.object), current, target) - } - original := mc.object.DeepCopy() - modified := original.DeepCopy() + klog.InfoS("MinReady maxUnavailable exceeds target, reducing", + "batch", batchContext.CurrentBatch, + "deployment", klog.KObj(mc.object), + "maxUnavailable", current, + "target", target) + } + original := mc.object + modified := mc.object.DeepCopy() maxUnavailable := intstr.FromInt(int(target)) modified.Spec.Strategy.RollingUpdate.MaxUnavailable = &maxUnavailable patch := client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{}) - return mc.client.Patch(context.TODO(), modified, patch) + return mc.client.Patch(ctx, modified, patch) } -func (mc *MinReadyControl) Finalize(_ *v1beta1.BatchRelease) error { +func (mc *MinReadyControl) Finalize(ctx context.Context, _ *v1beta1.BatchRelease) error { if mc.object == nil { return nil } @@ -117,12 +126,12 @@ func (mc *MinReadyControl) Finalize(_ *v1beta1.BatchRelease) error { } return nil } - original := mc.object.DeepCopy() + original := mc.object restored, err := parseOriginalDeploymentStrategy(original.Annotations) if err != nil { return fmt.Errorf("MinReadyControl.Finalize: %w", err) } - modified := original.DeepCopy() + modified := mc.object.DeepCopy() applyOriginalDeploymentStrategy(modified, restored) for _, key := range AllOriginalAnnotations { delete(modified.Annotations, key) @@ -130,7 +139,7 @@ func (mc *MinReadyControl) Finalize(_ *v1beta1.BatchRelease) error { delete(modified.Annotations, util.BatchReleaseControlAnnotation) delete(modified.Labels, v1alpha1.DeploymentStableRevisionLabel) patch := client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{}) - return mc.client.Patch(context.TODO(), modified, patch) + return mc.client.Patch(ctx, modified, patch) } func (mc *MinReadyControl) CalculateBatchContext(release *v1beta1.BatchRelease) (*batchcontext.BatchContext, error) { @@ -179,28 +188,18 @@ func (mc *MinReadyControl) ensureInitializeAllowed() error { return nil } -func writeOriginalAnnotations(original, modified *apps.Deployment) error { +func ensureOriginalAnnotations(deployment *apps.Deployment) error { + _, err := parseOriginalDeploymentStrategy(deployment.Annotations) + return err +} + +func writeOriginalAnnotations(original, modified *apps.Deployment) { if modified.Annotations == nil { modified.Annotations = map[string]string{} } - if hasAnyOriginalAnnotation(original.Annotations) { - _, err := parseOriginalDeploymentStrategy(original.Annotations) - return err - } modified.Annotations[AnnotationOriginalMinReadySeconds] = serializeOriginalInt32(&original.Spec.MinReadySeconds) modified.Annotations[AnnotationOriginalProgressDeadlineSeconds] = serializeOriginalInt32(original.Spec.ProgressDeadlineSeconds) modified.Annotations[AnnotationOriginalMaxUnavailable] = serializeOriginalIntOrString(originalMaxUnavailable(original)) - modified.Annotations[AnnotationOriginalMaxSurge] = serializeOriginalIntOrString(originalMaxSurge(original)) - return nil -} - -func ensureAllOriginalAnnotations(annotations map[string]string) error { - for _, key := range AllOriginalAnnotations { - if _, err := readOriginalAnnotation(annotations, key); err != nil { - return err - } - } - return nil } func originalMaxUnavailable(deployment *apps.Deployment) *intstr.IntOrString { @@ -210,13 +209,6 @@ func originalMaxUnavailable(deployment *apps.Deployment) *intstr.IntOrString { return deployment.Spec.Strategy.RollingUpdate.MaxUnavailable } -func originalMaxSurge(deployment *apps.Deployment) *intstr.IntOrString { - if deployment.Spec.Strategy.RollingUpdate == nil { - return nil - } - return deployment.Spec.Strategy.RollingUpdate.MaxSurge -} - func inflateDeploymentStrategy(deployment *apps.Deployment) { progressDeadlineSeconds := InflatedProgressDeadlineSeconds maxUnavailable := intstr.FromInt(0) @@ -229,7 +221,6 @@ func inflateDeploymentStrategy(deployment *apps.Deployment) { deployment.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{} } deployment.Spec.Strategy.RollingUpdate.MaxUnavailable = &maxUnavailable - applyMaxSurgeValidationFallback(deployment) } // EnrollMinReadyDeployment snapshots the original strategy fields into @@ -243,30 +234,32 @@ func EnrollMinReadyDeployment(deployment *apps.Deployment) error { return err } snapshot := deployment.DeepCopy() - if err := writeOriginalAnnotations(snapshot, deployment); err != nil { - return err - } if hasAnyOriginalAnnotation(snapshot.Annotations) { + if err := ensureOriginalAnnotations(snapshot); err != nil { + return err + } if err := validateInflatedDeploymentStrategy(snapshot); err != nil { return err } + } else { + writeOriginalAnnotations(snapshot, deployment) } inflateDeploymentStrategy(deployment) return nil } -func (mc *MinReadyControl) ensureInflatedDeploymentStrategy() error { +func (mc *MinReadyControl) ensureInflatedDeploymentStrategy(ctx context.Context) error { if err := validateDeploymentStrategyType(mc.object); err != nil { return err } if validateInflatedDeploymentStrategy(mc.object) == nil { return nil } - original := mc.object.DeepCopy() - modified := original.DeepCopy() + original := mc.object + modified := mc.object.DeepCopy() inflateDeploymentStrategy(modified) patch := client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{}) - if err := mc.client.Patch(context.TODO(), modified, patch); err != nil { + if err := mc.client.Patch(ctx, modified, patch); err != nil { return err } mc.object = modified @@ -312,33 +305,13 @@ func hasInflatedDeploymentFields(deployment *apps.Deployment) bool { *deployment.Spec.ProgressDeadlineSeconds == InflatedProgressDeadlineSeconds } -func applyMaxSurgeValidationFallback(deployment *apps.Deployment) { - if deployment.Spec.Strategy.RollingUpdate.MaxSurge == nil { - return - } - replicas := int32(1) - if deployment.Spec.Replicas != nil && *deployment.Spec.Replicas > 0 { - replicas = *deployment.Spec.Replicas - } - surge, err := intstr.GetScaledValueFromIntOrPercent(deployment.Spec.Strategy.RollingUpdate.MaxSurge, int(replicas), true) - if err != nil || surge > 0 { - return - } - maxSurge := intstr.FromInt(1) - deployment.Spec.Strategy.RollingUpdate.MaxSurge = &maxSurge -} - type originalDeploymentStrategy struct { minReadySeconds *int32 progressDeadlineSeconds *int32 maxUnavailable *intstr.IntOrString - maxSurge *intstr.IntOrString } func parseOriginalDeploymentStrategy(annotations map[string]string) (*originalDeploymentStrategy, error) { - if err := ensureAllOriginalAnnotations(annotations); err != nil { - return nil, err - } minReadySeconds, err := parseOriginalInt32(annotations, AnnotationOriginalMinReadySeconds) if err != nil { return nil, err @@ -351,15 +324,10 @@ func parseOriginalDeploymentStrategy(annotations map[string]string) (*originalDe if err != nil { return nil, err } - maxSurge, err := parseOriginalIntOrString(annotations, AnnotationOriginalMaxSurge) - if err != nil { - return nil, err - } return &originalDeploymentStrategy{ minReadySeconds: minReadySeconds, progressDeadlineSeconds: progressDeadlineSeconds, maxUnavailable: maxUnavailable, - maxSurge: maxSurge, }, nil } @@ -369,7 +337,8 @@ func applyOriginalDeploymentStrategy(deployment *apps.Deployment, original *orig deployment.Spec.MinReadySeconds = *original.minReadySeconds } deployment.Spec.ProgressDeadlineSeconds = original.progressDeadlineSeconds - if original.maxUnavailable == nil && original.maxSurge == nil { + if original.maxUnavailable == nil && (deployment.Spec.Strategy.RollingUpdate == nil || + deployment.Spec.Strategy.RollingUpdate.MaxSurge == nil) { deployment.Spec.Strategy.RollingUpdate = nil return } @@ -377,7 +346,6 @@ func applyOriginalDeploymentStrategy(deployment *apps.Deployment, original *orig deployment.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{} } deployment.Spec.Strategy.RollingUpdate.MaxUnavailable = original.maxUnavailable - deployment.Spec.Strategy.RollingUpdate.MaxSurge = original.maxSurge } // EventDegradedDriftDetected is the warning event reason recorded when diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go index a7d27887..87ee6985 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go @@ -17,6 +17,7 @@ limitations under the License. package deployment import ( + "context" "strings" "testing" @@ -41,7 +42,7 @@ func TestMinReadyInitializeWritesOriginalAnnotationsAndInflatesFields(t *testing deployment := newMinReadyDeployment() control := newBuiltMinReadyControl(t, deployment) - if err := control.Initialize(releaseDemo.DeepCopy()); err != nil { + if err := control.Initialize(context.Background(), releaseDemo.DeepCopy()); err != nil { t.Fatalf("Initialize failed: %v", err) } @@ -51,7 +52,6 @@ func TestMinReadyInitializeWritesOriginalAnnotationsAndInflatesFields(t *testing assertAnnotation(t, annotations, AnnotationOriginalMinReadySeconds, "7") assertAnnotation(t, annotations, AnnotationOriginalProgressDeadlineSeconds, "60") assertAnnotation(t, annotations, AnnotationOriginalMaxUnavailable, "25%") - assertAnnotation(t, annotations, AnnotationOriginalMaxSurge, "1") assertAnnotation(t, annotations, util.BatchReleaseControlAnnotation, getControlInfo(releaseDemo)) } @@ -62,12 +62,11 @@ func TestMinReadyInitializeIsIdempotentAndDoesNotOverwriteAnnotations(t *testing AnnotationOriginalMinReadySeconds: "5", AnnotationOriginalProgressDeadlineSeconds: "30", AnnotationOriginalMaxUnavailable: "10%", - AnnotationOriginalMaxSurge: "2", } inflateDeploymentStrategy(deployment) control := newBuiltMinReadyControl(t, deployment) - if err := control.Initialize(releaseDemo.DeepCopy()); err != nil { + if err := control.Initialize(context.Background(), releaseDemo.DeepCopy()); err != nil { t.Fatalf("Initialize failed: %v", err) } @@ -75,7 +74,6 @@ func TestMinReadyInitializeIsIdempotentAndDoesNotOverwriteAnnotations(t *testing assertAnnotation(t, got.Annotations, AnnotationOriginalMinReadySeconds, "5") assertAnnotation(t, got.Annotations, AnnotationOriginalProgressDeadlineSeconds, "30") assertAnnotation(t, got.Annotations, AnnotationOriginalMaxUnavailable, "10%") - assertAnnotation(t, got.Annotations, AnnotationOriginalMaxSurge, "2") assertMinReadyInflated(t, got) } @@ -86,11 +84,10 @@ func TestMinReadyInitializeRejectsGitOpsDrift(t *testing.T) { AnnotationOriginalMinReadySeconds: "5", AnnotationOriginalProgressDeadlineSeconds: "30", AnnotationOriginalMaxUnavailable: "10%", - AnnotationOriginalMaxSurge: "2", } control := newBuiltMinReadyControl(t, deployment) - err := control.Initialize(releaseDemo.DeepCopy()) + err := control.Initialize(context.Background(), releaseDemo.DeepCopy()) if err == nil || !strings.Contains(err.Error(), EventDegradedDriftDetected) { t.Fatalf("Initialize error = %v, want drift detected", err) } @@ -104,7 +101,7 @@ func TestMinReadyInitializeRejectsPartialOriginalAnnotations(t *testing.T) { } control := newBuiltMinReadyControl(t, deployment) - err := control.Initialize(releaseDemo.DeepCopy()) + err := control.Initialize(context.Background(), releaseDemo.DeepCopy()) if err == nil || !strings.Contains(err.Error(), AnnotationOriginalProgressDeadlineSeconds) { t.Fatalf("Initialize error = %v, want missing annotation error", err) } @@ -117,11 +114,10 @@ func TestMinReadyInitializeRejectsEmptyOriginalAnnotations(t *testing.T) { AnnotationOriginalMinReadySeconds: "", AnnotationOriginalProgressDeadlineSeconds: "30", AnnotationOriginalMaxUnavailable: "10%", - AnnotationOriginalMaxSurge: "2", } control := newBuiltMinReadyControl(t, deployment) - err := control.Initialize(releaseDemo.DeepCopy()) + err := control.Initialize(context.Background(), releaseDemo.DeepCopy()) if err == nil || !strings.Contains(err.Error(), "present but empty") { t.Fatalf("Initialize error = %v, want empty annotation error", err) } @@ -134,14 +130,13 @@ func TestMinReadyInitializeSerializesKubernetesDefaults(t *testing.T) { deployment.Spec.Strategy.RollingUpdate = nil control := newBuiltMinReadyControl(t, deployment) - if err := control.Initialize(releaseDemo.DeepCopy()); err != nil { + if err := control.Initialize(context.Background(), releaseDemo.DeepCopy()); err != nil { t.Fatalf("Initialize failed: %v", err) } got := fetchMinReadyDeployment(t, control) - assertAnnotation(t, got.Annotations, AnnotationOriginalProgressDeadlineSeconds, AnnotationValueKubernetesDefault) - assertAnnotation(t, got.Annotations, AnnotationOriginalMaxUnavailable, AnnotationValueKubernetesDefault) - assertAnnotation(t, got.Annotations, AnnotationOriginalMaxSurge, AnnotationValueKubernetesDefault) + assertAnnotation(t, got.Annotations, AnnotationOriginalProgressDeadlineSeconds, "600") + assertAnnotation(t, got.Annotations, AnnotationOriginalMaxUnavailable, "25%") assertMinReadyInflatedWithoutSurgeRequirement(t, got) } @@ -149,7 +144,7 @@ func TestMinReadyInitializeRejectsFeatureGateDisabled(t *testing.T) { _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=false") control := newBuiltMinReadyControl(t, newMinReadyDeployment()) - err := control.Initialize(releaseDemo.DeepCopy()) + err := control.Initialize(context.Background(), releaseDemo.DeepCopy()) if err == nil || !strings.Contains(err.Error(), "feature gate is disabled") { t.Fatalf("Initialize error = %v, want feature gate disabled", err) } @@ -166,7 +161,7 @@ func TestMinReadyInitializeAllowsCoveringPDB(t *testing.T) { } control := newBuiltMinReadyControl(t, deployment, pdb) - if err := control.Initialize(releaseDemo.DeepCopy()); err != nil { + if err := control.Initialize(context.Background(), releaseDemo.DeepCopy()); err != nil { t.Fatalf("Initialize failed: %v", err) } } @@ -175,7 +170,7 @@ func TestMinReadyUpgradeBatchUpdatesMaxUnavailableOnly(t *testing.T) { _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") deployment := newMinReadyDeployment() control := newBuiltMinReadyControl(t, deployment) - if err := control.Initialize(releaseDemo.DeepCopy()); err != nil { + if err := control.Initialize(context.Background(), releaseDemo.DeepCopy()); err != nil { t.Fatalf("Initialize failed: %v", err) } control.object = fetchMinReadyDeployment(t, control) @@ -185,7 +180,7 @@ func TestMinReadyUpgradeBatchUpdatesMaxUnavailableOnly(t *testing.T) { DesiredUpdatedReplicas: 5, } - if err := control.UpgradeBatch(ctx); err != nil { + if err := control.UpgradeBatch(context.Background(), ctx); err != nil { t.Fatalf("UpgradeBatch failed: %v", err) } @@ -193,9 +188,6 @@ func TestMinReadyUpgradeBatchUpdatesMaxUnavailableOnly(t *testing.T) { if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 5 { t.Fatalf("maxUnavailable = %v, want 5", unavailable) } - if surge := got.Spec.Strategy.RollingUpdate.MaxSurge; surge == nil || surge.IntVal != InflatedMaxSurgeInt { - t.Fatalf("maxSurge = %v, want %d", surge, InflatedMaxSurgeInt) - } if got.Spec.Strategy.Type != apps.RollingUpdateDeploymentStrategyType { t.Fatalf("strategy.type = %q, want RollingUpdate", got.Spec.Strategy.Type) } @@ -213,7 +205,7 @@ func TestMinReadyUpgradeBatchRejectsStrategyTypeDrift(t *testing.T) { DesiredUpdatedReplicas: 5, } - err := control.UpgradeBatch(ctx) + err := control.UpgradeBatch(context.Background(), ctx) if err == nil || !strings.Contains(err.Error(), EventDegradedDriftDetected) { t.Fatalf("UpgradeBatch error = %v, want strategy type drift detected", err) } @@ -237,7 +229,7 @@ func TestMinReadyUpgradeBatchHealsPausedDrift(t *testing.T) { DesiredUpdatedReplicas: 5, } - if err := control.UpgradeBatch(ctx); err != nil { + if err := control.UpgradeBatch(context.Background(), ctx); err != nil { t.Fatalf("UpgradeBatch failed: %v", err) } @@ -264,7 +256,7 @@ func TestMinReadyUpgradeBatchRestoresInflatedStrategyFields(t *testing.T) { DesiredUpdatedReplicas: 5, } - if err := control.UpgradeBatch(ctx); err != nil { + if err := control.UpgradeBatch(context.Background(), ctx); err != nil { t.Fatalf("UpgradeBatch failed: %v", err) } @@ -300,7 +292,7 @@ func TestMinReadyUpgradeBatchConvergesMaxUnavailableOnScaleDown(t *testing.T) { DesiredUpdatedReplicas: 5, } - if err := control.UpgradeBatch(ctx); err != nil { + if err := control.UpgradeBatch(context.Background(), ctx); err != nil { t.Fatalf("UpgradeBatch failed: %v", err) } @@ -533,7 +525,7 @@ func TestMinReadyFinalizeRestoresAfterGateDisabled(t *testing.T) { addMinReadyOriginalAnnotations(deployment) control := newBuiltMinReadyControl(t, deployment) - if err := control.Finalize(releaseDemo.DeepCopy()); err != nil { + if err := control.Finalize(context.Background(), releaseDemo.DeepCopy()); err != nil { t.Fatalf("Finalize failed: %v", err) } diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_finalize_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_finalize_test.go index c846fbab..28101ef1 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_finalize_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_finalize_test.go @@ -17,6 +17,7 @@ limitations under the License. package deployment import ( + "context" "strings" "testing" @@ -30,7 +31,6 @@ func TestMinReadyFinalizeRestoresOriginalValues(t *testing.T) { AnnotationOriginalMinReadySeconds: "7", AnnotationOriginalProgressDeadlineSeconds: "60", AnnotationOriginalMaxUnavailable: "25%", - AnnotationOriginalMaxSurge: "1", util.BatchReleaseControlAnnotation: getControlInfo(releaseDemo), } deployment.Labels = map[string]string{ @@ -38,7 +38,7 @@ func TestMinReadyFinalizeRestoresOriginalValues(t *testing.T) { } control := newBuiltMinReadyControl(t, deployment) - if err := control.Finalize(releaseDemo.DeepCopy()); err != nil { + if err := control.Finalize(context.Background(), releaseDemo.DeepCopy()); err != nil { t.Fatalf("Finalize failed: %v", err) } @@ -52,9 +52,6 @@ func TestMinReadyFinalizeRestoresOriginalValues(t *testing.T) { if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.StrVal != "25%" { t.Fatalf("maxUnavailable = %v, want 25%%", unavailable) } - if surge := got.Spec.Strategy.RollingUpdate.MaxSurge; surge == nil || surge.IntVal != 1 { - t.Fatalf("maxSurge = %v, want 1", surge) - } for _, key := range AllOriginalAnnotations { if _, ok := got.Annotations[key]; ok { t.Fatalf("annotation %s still exists", key) @@ -72,13 +69,12 @@ func TestMinReadyFinalizeRestoresKubernetesDefaults(t *testing.T) { deployment := newInflatedMinReadyDeployment() deployment.Annotations = map[string]string{ AnnotationOriginalMinReadySeconds: "0", - AnnotationOriginalProgressDeadlineSeconds: AnnotationValueKubernetesDefault, - AnnotationOriginalMaxUnavailable: AnnotationValueKubernetesDefault, - AnnotationOriginalMaxSurge: AnnotationValueKubernetesDefault, + AnnotationOriginalProgressDeadlineSeconds: "600", + AnnotationOriginalMaxUnavailable: "25%", } control := newBuiltMinReadyControl(t, deployment) - if err := control.Finalize(releaseDemo.DeepCopy()); err != nil { + if err := control.Finalize(context.Background(), releaseDemo.DeepCopy()); err != nil { t.Fatalf("Finalize failed: %v", err) } @@ -86,11 +82,17 @@ func TestMinReadyFinalizeRestoresKubernetesDefaults(t *testing.T) { if got.Spec.MinReadySeconds != 0 { t.Fatalf("minReadySeconds = %d, want 0", got.Spec.MinReadySeconds) } - if got.Spec.ProgressDeadlineSeconds != nil { - t.Fatalf("progressDeadlineSeconds = %v, want nil", got.Spec.ProgressDeadlineSeconds) + if got.Spec.ProgressDeadlineSeconds == nil || *got.Spec.ProgressDeadlineSeconds != DefaultProgressDeadlineSeconds { + t.Fatalf("progressDeadlineSeconds = %v, want %d", got.Spec.ProgressDeadlineSeconds, DefaultProgressDeadlineSeconds) } - if got.Spec.Strategy.RollingUpdate != nil { - t.Fatalf("rollingUpdate = %v, want nil", got.Spec.Strategy.RollingUpdate) + if got.Spec.Strategy.RollingUpdate == nil { + t.Fatalf("rollingUpdate = nil, want maxSurge preserved") + } + if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.StrVal != DefaultMaxUnavailable { + t.Fatalf("maxUnavailable = %v, want %s", unavailable, DefaultMaxUnavailable) + } + if surge := got.Spec.Strategy.RollingUpdate.MaxSurge; surge == nil || surge.IntVal != 1 { + t.Fatalf("maxSurge = %v, want original value preserved", surge) } } @@ -99,7 +101,7 @@ func TestMinReadyFinalizeNoopWhenAnnotationsAbsentAndFieldsRestored(t *testing.T deployment.Annotations = nil control := newBuiltMinReadyControl(t, deployment) - if err := control.Finalize(releaseDemo.DeepCopy()); err != nil { + if err := control.Finalize(context.Background(), releaseDemo.DeepCopy()); err != nil { t.Fatalf("Finalize failed: %v", err) } @@ -114,7 +116,7 @@ func TestMinReadyFinalizeRejectsMissingAnnotationsWhileFieldsInflated(t *testing deployment.Annotations = nil control := newBuiltMinReadyControl(t, deployment) - err := control.Finalize(releaseDemo.DeepCopy()) + err := control.Finalize(context.Background(), releaseDemo.DeepCopy()) if err == nil || !strings.Contains(err.Error(), "annotation state missing") { t.Fatalf("Finalize error = %v, want missing annotation state error", err) } @@ -130,7 +132,7 @@ func TestMinReadyFinalizeRejectsPartialAnnotations(t *testing.T) { } control := newBuiltMinReadyControl(t, deployment) - err := control.Finalize(releaseDemo.DeepCopy()) + err := control.Finalize(context.Background(), releaseDemo.DeepCopy()) if err == nil || !strings.Contains(err.Error(), AnnotationOriginalProgressDeadlineSeconds) { t.Fatalf("Finalize error = %v, want missing annotation error", err) } @@ -144,11 +146,10 @@ func TestMinReadyFinalizeRejectsMalformedAnnotations(t *testing.T) { AnnotationOriginalMinReadySeconds: "7", AnnotationOriginalProgressDeadlineSeconds: "bad", AnnotationOriginalMaxUnavailable: "25%", - AnnotationOriginalMaxSurge: "1", } control := newBuiltMinReadyControl(t, deployment) - err := control.Finalize(releaseDemo.DeepCopy()) + err := control.Finalize(context.Background(), releaseDemo.DeepCopy()) if err == nil || !strings.Contains(err.Error(), "malformed int32") { t.Fatalf("Finalize error = %v, want malformed int32 error", err) } diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_test_helpers_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_test_helpers_test.go index 4d20601c..ca3b01b5 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_test_helpers_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_test_helpers_test.go @@ -109,9 +109,6 @@ func assertMinReadyInflated(t *testing.T, deployment *apps.Deployment) { if got := deployment.Spec.Strategy.RollingUpdate.MaxUnavailable; got == nil || got.IntVal != 0 { t.Fatalf("maxUnavailable = %v, want 0", got) } - if got := deployment.Spec.Strategy.RollingUpdate.MaxSurge; got == nil || got.IntVal != InflatedMaxSurgeInt { - t.Fatalf("maxSurge = %v, want %d", got, InflatedMaxSurgeInt) - } } func assertMinReadyInflatedWithoutSurgeRequirement(t *testing.T, deployment *apps.Deployment) { @@ -137,7 +134,6 @@ func addMinReadyOriginalAnnotations(deployment *apps.Deployment) { deployment.Annotations[AnnotationOriginalMinReadySeconds] = "7" deployment.Annotations[AnnotationOriginalProgressDeadlineSeconds] = "60" deployment.Annotations[AnnotationOriginalMaxUnavailable] = "25%" - deployment.Annotations[AnnotationOriginalMaxSurge] = "1" } func appendPodObjects(objects []interface{}, pods []*corev1.Pod) []interface{} { diff --git a/pkg/controller/batchrelease/control/partitionstyle/interface.go b/pkg/controller/batchrelease/control/partitionstyle/interface.go index 90117785..d15095bd 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/interface.go +++ b/pkg/controller/batchrelease/control/partitionstyle/interface.go @@ -17,6 +17,8 @@ limitations under the License. package partitionstyle import ( + "context" + corev1 "k8s.io/api/core/v1" "github.com/openkruise/rollouts/api/v1beta1" @@ -40,11 +42,11 @@ type Interface interface { // Initialize do something before rolling out, for example: // - claim the workload is under our control; // - other things related with specific type of workload, such as 100% partition settings. - Initialize(release *v1beta1.BatchRelease) error + Initialize(ctx context.Context, release *v1beta1.BatchRelease) error // UpgradeBatch upgrade workload according current batch context. - UpgradeBatch(ctx *batchcontext.BatchContext) error + UpgradeBatch(ctx context.Context, batchContext *batchcontext.BatchContext) error // Finalize do something after rolling out, for example: // - free the stable workload from rollout control; // - resume workload if we need. - Finalize(release *v1beta1.BatchRelease) error + Finalize(ctx context.Context, release *v1beta1.BatchRelease) error } diff --git a/pkg/controller/batchrelease/control/partitionstyle/minready_status.go b/pkg/controller/batchrelease/control/partitionstyle/minready_status.go index 8261a1f9..1a71812c 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/minready_status.go +++ b/pkg/controller/batchrelease/control/partitionstyle/minready_status.go @@ -18,47 +18,36 @@ package partitionstyle import ( "errors" - "reflect" "time" - apps "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" + "k8s.io/klog/v2" "github.com/openkruise/rollouts/api/v1beta1" brmetrics "github.com/openkruise/rollouts/pkg/controller/batchrelease/metrics" - "github.com/openkruise/rollouts/pkg/feature" "github.com/openkruise/rollouts/pkg/util" - utilfeature "github.com/openkruise/rollouts/pkg/util/feature" ) -func (rc *realBatchControlPlane) isMinReadyRelease() bool { - if rc.release == nil { - return false - } - targetRef := rc.release.Spec.WorkloadRef - isDeploymentPartition := targetRef.APIVersion == apps.SchemeGroupVersion.String() && - targetRef.Kind == reflect.TypeOf(apps.Deployment{}).Name() && - rc.release.Spec.ReleasePlan.RollingStyle == v1beta1.PartitionRollingStyle - if !isDeploymentPartition { - return false - } - if utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) { - return true +type minReadyControllerMarker interface { + IsMinReadyControl() bool +} + +func isMinReadyController(controller Interface) bool { + marker, ok := controller.(minReadyControllerMarker) + return ok && marker.IsMinReadyControl() +} + +func (rc *realBatchControlPlane) recordMinReadyDegradedOrLog(minReady bool, reason string, err error) { + if minReady { + rc.recordMinReadyDegraded(reason, err) + return } - // Gate disabled mid-rollout: a Deployment still carrying MinReady original - // annotations is under MinReady control until finalized. Keep recording its - // status so degraded conditions are not silently suppressed. Falls back to - // false before the controller is built (no workload info yet). - if info := rc.GetWorkloadInfo(); info != nil { - return v1beta1.HasMinReadyOriginalAnnotations(info.Annotations) + if err != nil { + klog.ErrorS(err, "Partition-style control plane failed", "release", klog.KObj(rc.release), "reason", reason) } - return false } func (rc *realBatchControlPlane) recordMinReadyNormal(condType v1beta1.RolloutConditionType, reason, message string) { - if !rc.isMinReadyRelease() { - return - } previousCondition := util.GetBatchReleaseCondition(*rc.newStatus, condType) condition := util.NewRolloutCondition(condType, v1.ConditionTrue, reason, message) util.SetBatchReleaseCondition(rc.newStatus, *condition) @@ -88,7 +77,7 @@ func observeMinReadyBatchDuration(release *v1beta1.BatchRelease, condition *v1be } func (rc *realBatchControlPlane) recordMinReadyDegraded(reason string, err error) { - if !rc.isMinReadyRelease() || err == nil { + if err == nil { return } message := err.Error() diff --git a/pkg/controller/batchrelease/control/partitionstyle/nativedaemonset/control.go b/pkg/controller/batchrelease/control/partitionstyle/nativedaemonset/control.go index 9cc890fe..9c25a336 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/nativedaemonset/control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/nativedaemonset/control.go @@ -94,7 +94,7 @@ func (rc *realController) ListOwnedPods() ([]*corev1.Pod, error) { } // Initialize prepares the native DaemonSet for batch release by setting the appropriate update strategy. -func (rc *realController) Initialize(release *v1beta1.BatchRelease) error { +func (rc *realController) Initialize(ctx context.Context, release *v1beta1.BatchRelease) error { if control.IsControlledByBatchRelease(release, rc.object) { return nil } @@ -127,21 +127,21 @@ func (rc *realController) Initialize(release *v1beta1.BatchRelease) error { return fmt.Errorf("failed to marshal patch: %v", err) } - return rc.client.Patch(context.TODO(), daemon, client.RawPatch(types.MergePatchType, patchBytes)) + return rc.client.Patch(ctx, daemon, client.RawPatch(types.MergePatchType, patchBytes)) } // UpgradeBatch handles the batch upgrade for native DaemonSet by managing annotations. // The actual pod deletion is handled by the advanced-daemonset-controller. -func (rc *realController) UpgradeBatch(ctx *batchcontext.BatchContext) error { +func (rc *realController) UpgradeBatch(ctx context.Context, batchContext *batchcontext.BatchContext) error { // Check if the DaemonSet already has the partition annotation currentPartitionStr, _ := util.ParseDaemonSetAdvancedControl(rc.object.Annotations) - desiredPartitionStr := ctx.DesiredPartition.String() + desiredPartitionStr := batchContext.DesiredPartition.String() // If annotation is missing or doesn't equal desired value, patch the DaemonSet if currentPartitionStr != desiredPartitionStr { klog.Infof("Updating partition annotation for DaemonSet %s/%s: %s -> %s", rc.object.Namespace, rc.object.Name, currentPartitionStr, desiredPartitionStr) - return rc.patchBatchAnnotations(ctx) + return rc.patchBatchAnnotations(ctx, batchContext) } // Partition annotation already matches desired value, no action needed @@ -151,10 +151,10 @@ func (rc *realController) UpgradeBatch(ctx *batchcontext.BatchContext) error { } // patchBatchAnnotations patches the DaemonSet with batch control annotations -func (rc *realController) patchBatchAnnotations(ctx *batchcontext.BatchContext) error { +func (rc *realController) patchBatchAnnotations(ctx context.Context, batchContext *batchcontext.BatchContext) error { // Use SetDaemonSetAdvancedControl to set annotations annotations := make(map[string]string) - util.SetDaemonSetAdvancedControl(annotations, ctx.DesiredPartition.String(), ctx.UpdateRevision) + util.SetDaemonSetAdvancedControl(annotations, batchContext.DesiredPartition.String(), batchContext.UpdateRevision) // Create patch with batch annotations patch := map[string]interface{}{ @@ -169,11 +169,11 @@ func (rc *realController) patchBatchAnnotations(ctx *batchcontext.BatchContext) } daemon := util.GetEmptyObjectWithKey(rc.object) - return rc.client.Patch(context.TODO(), daemon, client.RawPatch(types.MergePatchType, patchBytes)) + return rc.client.Patch(ctx, daemon, client.RawPatch(types.MergePatchType, patchBytes)) } // Finalize cleans up the annotations and restores the original update strategy. -func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { +func (rc *realController) Finalize(ctx context.Context, release *v1beta1.BatchRelease) error { if rc.object == nil { return nil } @@ -205,7 +205,7 @@ func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { daemon := util.GetEmptyObjectWithKey(rc.object) - return rc.client.Patch(context.TODO(), daemon, client.RawPatch(types.MergePatchType, []byte(body))) + return rc.client.Patch(ctx, daemon, client.RawPatch(types.MergePatchType, []byte(body))) } // CalculateBatchContext calculates the batch context for native DaemonSet. diff --git a/pkg/controller/batchrelease/control/partitionstyle/nativedaemonset/control_test.go b/pkg/controller/batchrelease/control/partitionstyle/nativedaemonset/control_test.go index c4b3bdc5..5fe15a08 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/nativedaemonset/control_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/nativedaemonset/control_test.go @@ -433,7 +433,7 @@ func TestInitialize(t *testing.T) { controller := NewController(cli, key, gvk) builtController, _ := controller.BuildController() - err := builtController.Initialize(batchReleaseDemo) + err := builtController.Initialize(context.Background(), batchReleaseDemo) assert.NoError(t, err) // Verify the DaemonSet was updated correctly @@ -467,7 +467,7 @@ func TestInitializeAlreadyControlled(t *testing.T) { controller := NewController(cli, key, gvk) builtController, _ := controller.BuildController() - err := builtController.Initialize(batchReleaseDemo) + err := builtController.Initialize(context.Background(), batchReleaseDemo) assert.NoError(t, err) // Verify the DaemonSet was not changed @@ -497,7 +497,7 @@ func TestInitializeWithRollingUpdateStrategy(t *testing.T) { controller := NewController(cli, key, gvk) builtController, _ := controller.BuildController() - err := builtController.Initialize(batchReleaseDemo) + err := builtController.Initialize(context.Background(), batchReleaseDemo) assert.NoError(t, err) // Verify the DaemonSet was updated correctly @@ -540,7 +540,7 @@ func TestInitialize_PatchError(t *testing.T) { rc.object = daemon rc.WorkloadInfo = util.ParseWorkload(daemon) - err := rc.Initialize(batchReleaseDemo) + err := rc.Initialize(context.Background(), batchReleaseDemo) assert.Error(t, err) // Should fail because daemon doesn't exist in client } @@ -568,7 +568,7 @@ func TestUpgradeBatchFirstTime(t *testing.T) { Replicas: 5, } - err := builtController.UpgradeBatch(ctx) + err := builtController.UpgradeBatch(context.Background(), ctx) assert.NoError(t, err) // Verify the DaemonSet has the batch annotations with JSON format @@ -611,7 +611,7 @@ func TestUpgradeBatchSamePartition(t *testing.T) { Replicas: 5, } - err := builtController.UpgradeBatch(ctx) + err := builtController.UpgradeBatch(context.Background(), ctx) assert.NoError(t, err) // Verify the DaemonSet annotations remain unchanged (no additional patch call) @@ -651,7 +651,7 @@ func TestUpgradeBatchDifferentPartition(t *testing.T) { Replicas: 5, } - err := builtController.UpgradeBatch(ctx) + err := builtController.UpgradeBatch(context.Background(), ctx) assert.NoError(t, err) // Verify the DaemonSet annotations are updated to the new partition @@ -693,7 +693,7 @@ func TestPatchBatchAnnotations_PatchError(t *testing.T) { UpdateRevision: "update-revision-123", } - err := rc.patchBatchAnnotations(ctx) + err := rc.patchBatchAnnotations(context.Background(), ctx) assert.Error(t, err) // Should fail because daemon doesn't exist in client } @@ -719,7 +719,7 @@ func TestFinalizeWithBatchPartitionNil(t *testing.T) { completedBatchRelease := batchReleaseDemo.DeepCopy() completedBatchRelease.Spec.ReleasePlan.BatchPartition = nil - err := builtController.Finalize(completedBatchRelease) + err := builtController.Finalize(context.Background(), completedBatchRelease) assert.NoError(t, err) // Verify the DaemonSet was updated correctly @@ -762,7 +762,7 @@ func TestFinalizeWithOriginalRollingUpdateStrategy(t *testing.T) { completedBatchRelease := batchReleaseDemo.DeepCopy() completedBatchRelease.Spec.ReleasePlan.BatchPartition = nil - err := builtController.Finalize(completedBatchRelease) + err := builtController.Finalize(context.Background(), completedBatchRelease) assert.NoError(t, err) // Verify the DaemonSet was updated correctly @@ -807,7 +807,7 @@ func TestFinalizeWithOriginalOnDeleteStrategy(t *testing.T) { completedBatchRelease := batchReleaseDemo.DeepCopy() completedBatchRelease.Spec.ReleasePlan.BatchPartition = nil - err := builtController.Finalize(completedBatchRelease) + err := builtController.Finalize(context.Background(), completedBatchRelease) assert.NoError(t, err) // Verify the DaemonSet was updated correctly @@ -852,7 +852,7 @@ func TestFinalizeWithMissingOriginalStrategy(t *testing.T) { completedBatchRelease := batchReleaseDemo.DeepCopy() completedBatchRelease.Spec.ReleasePlan.BatchPartition = nil - err := builtController.Finalize(completedBatchRelease) + err := builtController.Finalize(context.Background(), completedBatchRelease) assert.NoError(t, err) // Verify the DaemonSet was updated correctly @@ -896,7 +896,7 @@ func TestFinalizeWithBatchPartitionNotNil(t *testing.T) { batchPartition := int32(1) inProgressBatchRelease.Spec.ReleasePlan.BatchPartition = &batchPartition - err := builtController.Finalize(inProgressBatchRelease) + err := builtController.Finalize(context.Background(), inProgressBatchRelease) assert.NoError(t, err) // Verify the DaemonSet was updated correctly @@ -929,7 +929,7 @@ func TestFinalize_NilObject(t *testing.T) { rc := controller.(*realController) rc.object = nil // Set object to nil - err := rc.Finalize(batchReleaseDemo) + err := rc.Finalize(context.Background(), batchReleaseDemo) assert.NoError(t, err) // Should return without error } @@ -953,7 +953,7 @@ func TestFinalize_PatchError(t *testing.T) { rc.object = daemon rc.WorkloadInfo = util.ParseWorkload(daemon) - err := rc.Finalize(batchReleaseDemo) + err := rc.Finalize(context.Background(), batchReleaseDemo) assert.Error(t, err) // Should fail because daemon doesn't exist in client } diff --git a/pkg/controller/batchrelease/control/partitionstyle/statefulset/control.go b/pkg/controller/batchrelease/control/partitionstyle/statefulset/control.go index 405dd36a..33241582 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/statefulset/control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/statefulset/control.go @@ -100,7 +100,7 @@ func (rc *realController) ListOwnedPods() ([]*corev1.Pod, error) { return rc.pods, err } -func (rc *realController) Initialize(release *v1beta1.BatchRelease) error { +func (rc *realController) Initialize(ctx context.Context, release *v1beta1.BatchRelease) error { if control.IsControlledByBatchRelease(release, rc.object) { return nil } @@ -111,12 +111,12 @@ func (rc *realController) Initialize(release *v1beta1.BatchRelease) error { body := fmt.Sprintf(`{%s,%s}`, metaBody, specBody) clone := util.GetEmptyObjectWithKey(rc.object) - return rc.client.Patch(context.TODO(), clone, client.RawPatch(types.MergePatchType, []byte(body))) + return rc.client.Patch(ctx, clone, client.RawPatch(types.MergePatchType, []byte(body))) } -func (rc *realController) UpgradeBatch(ctx *batchcontext.BatchContext) error { - desired := ctx.DesiredPartition.IntVal - current := ctx.CurrentPartition.IntVal +func (rc *realController) UpgradeBatch(ctx context.Context, batchContext *batchcontext.BatchContext) error { + desired := batchContext.DesiredPartition.IntVal + current := batchContext.CurrentPartition.IntVal // current less than desired, which means current revision replicas will be less than desired, // in other word, update revision replicas will be more than desired, no need to update again. if current <= desired { @@ -126,10 +126,10 @@ func (rc *realController) UpgradeBatch(ctx *batchcontext.BatchContext) error { body := fmt.Sprintf(`{"spec":{"updateStrategy":{"rollingUpdate":{"partition":%d}}}}`, desired) clone := rc.object.DeepCopyObject().(client.Object) - return rc.client.Patch(context.TODO(), clone, client.RawPatch(types.MergePatchType, []byte(body))) + return rc.client.Patch(ctx, clone, client.RawPatch(types.MergePatchType, []byte(body))) } -func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { +func (rc *realController) Finalize(ctx context.Context, release *v1beta1.BatchRelease) error { if rc.object == nil { return nil } @@ -144,7 +144,7 @@ func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { } body := fmt.Sprintf(`{"metadata":{"annotations":{"%s":null}}%s}`, util.BatchReleaseControlAnnotation, specBody) clone := util.GetEmptyObjectWithKey(rc.object) - return rc.client.Patch(context.TODO(), clone, client.RawPatch(types.MergePatchType, []byte(body))) + return rc.client.Patch(ctx, clone, client.RawPatch(types.MergePatchType, []byte(body))) } func (rc *realController) CalculateBatchContext(release *v1beta1.BatchRelease) (*batchcontext.BatchContext, error) { diff --git a/pkg/controller/batchrelease/control/partitionstyle/statefulset/control_test.go b/pkg/controller/batchrelease/control/partitionstyle/statefulset/control_test.go index b78730e9..20ca7ee3 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/statefulset/control_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/statefulset/control_test.go @@ -580,7 +580,7 @@ func TestRealController(t *testing.T) { controller, err := c.BuildController() Expect(err).NotTo(HaveOccurred()) - err = controller.Initialize(release) + err = controller.Initialize(context.Background(), release) Expect(err).NotTo(HaveOccurred()) fetch := &kruiseappsv1beta1.StatefulSet{} Expect(cli.Get(context.TODO(), stsKey, fetch)).NotTo(HaveOccurred()) @@ -591,7 +591,7 @@ func TestRealController(t *testing.T) { for { batchContext, err := controller.CalculateBatchContext(release) Expect(err).NotTo(HaveOccurred()) - err = controller.UpgradeBatch(batchContext) + err = controller.UpgradeBatch(context.Background(), batchContext) // mock fetch = &kruiseappsv1beta1.StatefulSet{} Expect(cli.Get(context.TODO(), stsKey, fetch)).NotTo(HaveOccurred()) @@ -605,11 +605,11 @@ func TestRealController(t *testing.T) { Expect(*fetch.Spec.UpdateStrategy.RollingUpdate.Partition).Should(BeNumerically("==", 9)) // mock - _ = controller.Finalize(release) + _ = controller.Finalize(context.Background(), release) fetch = &kruiseappsv1beta1.StatefulSet{} Expect(cli.Get(context.TODO(), stsKey, fetch)).NotTo(HaveOccurred()) c.object = fetch - err = controller.Finalize(release) + err = controller.Finalize(context.Background(), release) Expect(err).NotTo(HaveOccurred()) fetch = &kruiseappsv1beta1.StatefulSet{} Expect(cli.Get(context.TODO(), stsKey, fetch)).NotTo(HaveOccurred()) @@ -737,7 +737,7 @@ func TestFinalize(t *testing.T) { t.Fatalf("BuildController failed: %s", err.Error()) } cs.featureGateFunc() - err = c.Finalize(br) + err = c.Finalize(context.Background(), br) if err != nil { t.Fatalf("BuildController failed: %s", err.Error()) } diff --git a/pkg/webhook/workload/mutating/workload_update_handler.go b/pkg/webhook/workload/mutating/workload_update_handler.go index 7564cef8..c58a701f 100644 --- a/pkg/webhook/workload/mutating/workload_update_handler.go +++ b/pkg/webhook/workload/mutating/workload_update_handler.go @@ -241,7 +241,7 @@ func (h *WorkloadHandler) handleDeployment(newObj, oldObj *apps.Deployment) (boo // in rollout progressing if newObj.Annotations[util.InRolloutProgressingAnnotation] != "" { modified := false - if shouldSkipRecreateMutationForMinReady(rollout) { + if isMinReadySecondsStrategy(rollout, newObj) { return enforceMinReadyInflation(newObj), nil } strategy := util.GetDeploymentStrategy(newObj) @@ -330,7 +330,7 @@ func (h *WorkloadHandler) handleDeployment(newObj, oldObj *apps.Deployment) (boo newObj.Labels[appsv1alpha1.DeploymentStableRevisionLabel] = stableRS.Labels[apps.DefaultDeploymentUniqueLabelKey] } - if shouldSkipRecreateMutationForMinReady(rollout) { + if isMinReadySecondsStrategy(rollout, newObj) { // MinReady keeps the native controller running, so it must NOT be paused. // Inflate the strategy synchronously at admission time: this snapshots the // original fields into annotations and sets minReadySeconds/maxUnavailable @@ -471,20 +471,25 @@ func isEffectiveDeploymentRevisionChange(oldObj, newObj *apps.Deployment) bool { return true } -// shouldSkipRecreateMutationForMinReady reports whether the Deployment should be -// driven by the MinReadySeconds strategy (keep RollingUpdate, do not pause) -// instead of the legacy Recreate-style mutation. +// isMinReadySecondsStrategy reports whether the Deployment should be driven by +// the MinReadySeconds strategy (keep RollingUpdate, do not pause) instead of +// the legacy Recreate-style mutation. // // It only checks Canary because a Rollout cannot declare BlueGreen and Canary at // the same time: the validating webhook rejects that combination // (pkg/webhook/rollout/validating/rollout_create_update_handler.go, -// "Canary and BlueGreen cannot both be set"). With BlueGreen==nil guaranteed, -// Canary!=nil && !EnableExtraWorkloadForCanary is equivalent to the executor's -// GetRollingStyle()==Partition routing, so both sides agree on MinReady. -func shouldSkipRecreateMutationForMinReady(rollout *appsv1beta1.Rollout) bool { - return rollout.Spec.Strategy.Canary != nil && - !rollout.Spec.Strategy.Canary.EnableExtraWorkloadForCanary && - utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) +// "Canary and BlueGreen cannot both be set"). When the feature gate is disabled +// mid-rollout, the DeploymentStrategyAnnotation keeps this symmetric with the +// executor's MinReady annotation fallback. +func isMinReadySecondsStrategy(rollout *appsv1beta1.Rollout, deployment *apps.Deployment) bool { + if rollout.Spec.Strategy.Canary == nil || rollout.Spec.Strategy.Canary.EnableExtraWorkloadForCanary { + return false + } + if utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) { + return true + } + strategy := util.GetDeploymentStrategy(deployment) + return strings.EqualFold(string(strategy.RollingStyle), string(appsv1alpha1.PartitionRollingStyle)) } func enforceMinReadyInflation(deployment *apps.Deployment) bool { diff --git a/pkg/webhook/workload/mutating/workload_update_handler_test.go b/pkg/webhook/workload/mutating/workload_update_handler_test.go index f38f4a29..ee2e3cc4 100644 --- a/pkg/webhook/workload/mutating/workload_update_handler_test.go +++ b/pkg/webhook/workload/mutating/workload_update_handler_test.go @@ -441,9 +441,8 @@ func TestHandlerDeployment(t *testing.T) { // minReadySeconds/progressDeadline/maxUnavailable synchronously so the // native controller never observes the original budget before Initialize. obj.Annotations[partitiondeployment.AnnotationOriginalMinReadySeconds] = "0" - obj.Annotations[partitiondeployment.AnnotationOriginalProgressDeadlineSeconds] = partitiondeployment.AnnotationValueKubernetesDefault - obj.Annotations[partitiondeployment.AnnotationOriginalMaxUnavailable] = partitiondeployment.AnnotationValueKubernetesDefault - obj.Annotations[partitiondeployment.AnnotationOriginalMaxSurge] = partitiondeployment.AnnotationValueKubernetesDefault + obj.Annotations[partitiondeployment.AnnotationOriginalProgressDeadlineSeconds] = "600" + obj.Annotations[partitiondeployment.AnnotationOriginalMaxUnavailable] = "25%" obj.Spec.Paused = false obj.Spec.MinReadySeconds = partitiondeployment.InflatedMinReadySeconds pds := partitiondeployment.InflatedProgressDeadlineSeconds @@ -885,18 +884,27 @@ func TestHandlerDeployment(t *testing.T) { } } -func TestShouldSkipRecreateMutationForMinReady(t *testing.T) { +func TestIsMinReadySecondsStrategy(t *testing.T) { rollout := rolloutDemo.DeepCopy() + deployment := deploymentDemo.DeepCopy() + if deployment.Annotations == nil { + deployment.Annotations = map[string]string{} + } _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=false") - if shouldSkipRecreateMutationForMinReady(rollout) { + if isMinReadySecondsStrategy(rollout, deployment) { t.Fatalf("skip returned true while feature gate is disabled") } + deployment.Annotations[appsv1alpha1.DeploymentStrategyAnnotation] = `{"rollingStyle":"Partition"}` + if !isMinReadySecondsStrategy(rollout, deployment) { + t.Fatalf("skip returned false for in-progress MinReady Deployment with strategy annotation") + } + delete(deployment.Annotations, appsv1alpha1.DeploymentStrategyAnnotation) _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") - if !shouldSkipRecreateMutationForMinReady(rollout) { + if !isMinReadySecondsStrategy(rollout, deployment) { t.Fatalf("skip returned false for MinReadySeconds with feature gate enabled") } rollout.Spec.Strategy.Canary.EnableExtraWorkloadForCanary = true - if shouldSkipRecreateMutationForMinReady(rollout) { + if isMinReadySecondsStrategy(rollout, deployment) { t.Fatalf("skip returned true for canary-style rollout") } } @@ -910,9 +918,8 @@ func inflatedMinReadyDeployment() *apps.Deployment { ObjectMeta: metav1.ObjectMeta{ Annotations: map[string]string{ partitiondeployment.AnnotationOriginalMinReadySeconds: "0", - partitiondeployment.AnnotationOriginalProgressDeadlineSeconds: partitiondeployment.AnnotationValueKubernetesDefault, - partitiondeployment.AnnotationOriginalMaxUnavailable: partitiondeployment.AnnotationValueKubernetesDefault, - partitiondeployment.AnnotationOriginalMaxSurge: partitiondeployment.AnnotationValueKubernetesDefault, + partitiondeployment.AnnotationOriginalProgressDeadlineSeconds: "600", + partitiondeployment.AnnotationOriginalMaxUnavailable: "25%", }, }, Spec: apps.DeploymentSpec{ diff --git a/test/e2e/deployment_minready_test.go b/test/e2e/deployment_minready_test.go index 3aa20a32..efd6ce39 100644 --- a/test/e2e/deployment_minready_test.go +++ b/test/e2e/deployment_minready_test.go @@ -122,13 +122,13 @@ var _ = SIGDescribe("Deployment MinReadySeconds", func() { It("TC7 missing annotation blocks finalize until the operator restores it", func() { rollout := startMinReadyE2ERollout(namespace) - deleteMinReadyE2EOriginalAnnotation(namespace, partitiondeployment.AnnotationOriginalMaxSurge) + deleteMinReadyE2EOriginalAnnotation(namespace, partitiondeployment.AnnotationOriginalMaxUnavailable) resumeMinReadyE2ERollout(namespace, rollout.Name) resumeMinReadyE2ERollout(namespace, rollout.Name) waitMinReadyE2EBatchCondition(namespace, rollout.Name, "MinReadyDegradedMissingAnnotations") waitMinReadyE2EEventReason(namespace, "MinReadyDegradedMissingAnnotations") - restoreMinReadyE2EOriginalAnnotation(namespace, partitiondeployment.AnnotationOriginalMaxSurge, "1") + restoreMinReadyE2EOriginalAnnotation(namespace, partitiondeployment.AnnotationOriginalMaxUnavailable, "25%") waitMinReadyE2ERolloutPhase(namespace, rollout.Name, v1beta1.RolloutPhaseHealthy) waitMinReadyE2EDeploymentRestored(namespace) }) diff --git a/test/integration/concurrency_test.go b/test/integration/concurrency_test.go index 63ea00d9..833a4ded 100644 --- a/test/integration/concurrency_test.go +++ b/test/integration/concurrency_test.go @@ -29,6 +29,7 @@ import ( "github.com/openkruise/rollouts/api/v1beta1" partitiondeployment "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment" "github.com/openkruise/rollouts/pkg/feature" + "github.com/openkruise/rollouts/pkg/util" utilfeature "github.com/openkruise/rollouts/pkg/util/feature" ) @@ -57,7 +58,7 @@ func TestDeploymentMinReadyConcurrentScaleUsesLatestReplicas(t *testing.T) { assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyBatching, corev1.ConditionTrue, "MinReadyBatching") } -func TestDeploymentMinReadyConcurrentGitOpsDriftIsDegraded(t *testing.T) { +func TestDeploymentMinReadyConcurrentMaxUnavailableAboveTargetSelfHeals(t *testing.T) { _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") release := newIntegrationMinReadyRelease() release.Status.CanaryStatus.CurrentBatch = 1 @@ -73,30 +74,31 @@ func TestDeploymentMinReadyConcurrentGitOpsDriftIsDegraded(t *testing.T) { control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) err := control.UpgradeBatch() - if err == nil || !strings.Contains(err.Error(), partitiondeployment.EventDegradedDriftDetected) { - t.Fatalf("UpgradeBatch error = %v, want drift detected", err) + if err != nil { + t.Fatalf("UpgradeBatch failed: %v", err) } got := fetchIntegrationDeployment(t, cli, deployment) - if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 6 { - t.Fatalf("maxUnavailable = %v, want drifted value preserved", unavailable) + if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 5 { + t.Fatalf("maxUnavailable = %v, want target value 5", unavailable) + } + if degraded := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyDegraded); degraded != nil { + t.Fatalf("degraded condition = %v, want nil", degraded) } - assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyDegraded, corev1.ConditionTrue, "MinReadyDegradedDriftDetected") - assertIntegrationEvent(t, recorder, "MinReadyDegradedDriftDetected") } func TestDeploymentMinReadyConcurrentAnnotationDeletionBlocksFinalize(t *testing.T) { _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") release := newIntegrationMinReadyRelease() deployment := newInflatedIntegrationDeployment() - delete(deployment.Annotations, partitiondeployment.AnnotationOriginalMaxSurge) + delete(deployment.Annotations, partitiondeployment.AnnotationOriginalMaxUnavailable) recorder := record.NewFakeRecorder(20) cli := newIntegrationClient(release, deployment) status := release.Status.DeepCopy() control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) err := control.Finalize() - if err == nil || !strings.Contains(err.Error(), partitiondeployment.AnnotationOriginalMaxSurge) { + if err == nil || !strings.Contains(err.Error(), partitiondeployment.AnnotationOriginalMaxUnavailable) { t.Fatalf("Finalize error = %v, want missing annotation", err) } diff --git a/test/integration/deployment_minready_test.go b/test/integration/deployment_minready_test.go index 99016971..a0f6c4c6 100644 --- a/test/integration/deployment_minready_test.go +++ b/test/integration/deployment_minready_test.go @@ -50,7 +50,6 @@ func TestDeploymentMinReadyControlPlaneInitialize(t *testing.T) { assertOriginalAnnotation(t, got, partitiondeployment.AnnotationOriginalMinReadySeconds, "5") assertOriginalAnnotation(t, got, partitiondeployment.AnnotationOriginalProgressDeadlineSeconds, "60") assertOriginalAnnotation(t, got, partitiondeployment.AnnotationOriginalMaxUnavailable, "25%") - assertOriginalAnnotation(t, got, partitiondeployment.AnnotationOriginalMaxSurge, "1") assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyInitialized, corev1.ConditionTrue, "MinReadyInitialized") assertIntegrationEvent(t, recorder, "MinReadyInitialized") } diff --git a/test/integration/minready_helpers_test.go b/test/integration/minready_helpers_test.go index 415736c3..b2158ff0 100644 --- a/test/integration/minready_helpers_test.go +++ b/test/integration/minready_helpers_test.go @@ -116,18 +116,13 @@ func newInflatedIntegrationDeployment() *apps.Deployment { deployment := newIntegrationDeployment() progressDeadlineSeconds := partitiondeployment.InflatedProgressDeadlineSeconds maxUnavailable := intstr.FromInt(0) - maxSurge := intstr.FromInt(int(partitiondeployment.InflatedMaxSurgeInt)) deployment.Spec.MinReadySeconds = partitiondeployment.InflatedMinReadySeconds deployment.Spec.ProgressDeadlineSeconds = &progressDeadlineSeconds - deployment.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{ - MaxUnavailable: &maxUnavailable, - MaxSurge: &maxSurge, - } + deployment.Spec.Strategy.RollingUpdate.MaxUnavailable = &maxUnavailable deployment.Annotations = map[string]string{ partitiondeployment.AnnotationOriginalMinReadySeconds: "5", partitiondeployment.AnnotationOriginalProgressDeadlineSeconds: "60", partitiondeployment.AnnotationOriginalMaxUnavailable: "25%", - partitiondeployment.AnnotationOriginalMaxSurge: "1", } return deployment } @@ -237,6 +232,7 @@ func newIntegrationMinReadyControl( Finalize() error } { return partitionstyle.NewControlPlane( + context.Background(), partitiondeployment.NewMinReadyController, cli, recorder, @@ -268,9 +264,6 @@ func assertInflatedDeployment(t *testing.T, deployment *apps.Deployment) { if unavailable := deployment.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 0 { t.Fatalf("maxUnavailable = %v, want 0", unavailable) } - if surge := deployment.Spec.Strategy.RollingUpdate.MaxSurge; surge == nil || surge.IntVal != partitiondeployment.InflatedMaxSurgeInt { - t.Fatalf("maxSurge = %v, want %d", surge, partitiondeployment.InflatedMaxSurgeInt) - } } func assertOriginalAnnotation(t *testing.T, deployment *apps.Deployment, key, want string) { From 5be167f13b5ba1fdd4f6fed95bfaff612ff1e9ff Mon Sep 17 00:00:00 2001 From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Date: Sun, 14 Jun 2026 17:46:31 +0800 Subject: [PATCH 07/22] fix: clear rollingUpdate for deployment recreate patch Signed-off-by: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> --- .../control/partitionstyle/deployment/control.go | 2 +- .../partitionstyle/deployment/control_test.go | 1 + pkg/util/patch/patch_utils.go | 15 +++++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/control.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/control.go index da3690a7..669c56e2 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/control.go @@ -117,7 +117,7 @@ func (rc *realController) Initialize(ctx context.Context, release *v1beta1.Batch // Disable the native deployment controller patchData.UpdatePaused(true) - patchData.UpdateStrategy(apps.DeploymentStrategy{Type: apps.RecreateDeploymentStrategyType}) + patchData.UpdateRecreateStrategy() return rc.client.Patch(ctx, d, patchData) } diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/control_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/control_test.go index 59ddc56d..59234409 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/control_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/control_test.go @@ -327,6 +327,7 @@ func TestRealController(t *testing.T) { Expect(cli.Get(context.TODO(), deploymentKey, fetch)).NotTo(HaveOccurred()) Expect(fetch.Spec.Paused).Should(BeTrue()) Expect(fetch.Spec.Strategy.Type).Should(Equal(apps.RecreateDeploymentStrategyType)) + Expect(fetch.Spec.Strategy.RollingUpdate).Should(BeNil()) Expect(fetch.Annotations[util.BatchReleaseControlAnnotation]).Should(Equal(getControlInfo(release))) strategy := util.GetDeploymentStrategy(fetch) Expect(strategy.Paused).Should(BeFalse()) diff --git a/pkg/util/patch/patch_utils.go b/pkg/util/patch/patch_utils.go index 25b8da15..e07b603b 100644 --- a/pkg/util/patch/patch_utils.go +++ b/pkg/util/patch/patch_utils.go @@ -214,6 +214,21 @@ func (s *DeploymentPatch) UpdateStrategy(strategy apps.DeploymentStrategy) *Depl return s } +func (s *DeploymentPatch) UpdateRecreateStrategy() *DeploymentPatch { + switch s.PatchType { + case types.StrategicMergePatchType, types.MergePatchType: + if _, ok := s.PatchData["spec"]; !ok { + s.PatchData["spec"] = make(map[string]interface{}) + } + spec := s.PatchData["spec"].(map[string]interface{}) + spec["strategy"] = map[string]interface{}{ + "type": apps.RecreateDeploymentStrategyType, + "rollingUpdate": nil, + } + } + return s +} + func (s *DeploymentPatch) UpdatePaused(paused bool) *DeploymentPatch { switch s.PatchType { case types.StrategicMergePatchType, types.MergePatchType: From 418b73037cfc629cc63817d36f6495151d36d8a7 Mon Sep 17 00:00:00 2001 From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Date: Sun, 14 Jun 2026 18:19:09 +0800 Subject: [PATCH 08/22] test: improve minready coverage Signed-off-by: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> --- .../partitionstyle/control_plane_test.go | 559 ++++++++++++++++++ pkg/util/patch/patch_utils_test.go | 144 +++++ 2 files changed, 703 insertions(+) create mode 100644 pkg/controller/batchrelease/control/partitionstyle/control_plane_test.go diff --git a/pkg/controller/batchrelease/control/partitionstyle/control_plane_test.go b/pkg/controller/batchrelease/control/partitionstyle/control_plane_test.go new file mode 100644 index 00000000..a1f3ac99 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/control_plane_test.go @@ -0,0 +1,559 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package partitionstyle + +import ( + "context" + "errors" + "testing" + + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/tools/record" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + "github.com/openkruise/rollouts/api/v1alpha1" + "github.com/openkruise/rollouts/api/v1beta1" + batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" + controlpkg "github.com/openkruise/rollouts/pkg/controller/batchrelease/control" + "github.com/openkruise/rollouts/pkg/util" +) + +type fakePartitionController struct { + buildResult Interface + buildErr error + minReady bool + + workloadInfo *util.WorkloadInfo + pods []*corev1.Pod + listErr error + batchCtx *batchcontext.BatchContext + calcErr error + + initErr error + upgradeErr error + finalizeErr error + + buildCalls int + initCalls int + upgradeCalls int + finalizeCalls int + calculateCalls int + listCalls int +} + +func (f *fakePartitionController) BuildController() (Interface, error) { + f.buildCalls++ + if f.buildErr != nil { + return nil, f.buildErr + } + if f.buildResult != nil { + return f.buildResult, nil + } + return f, nil +} + +func (f *fakePartitionController) GetWorkloadInfo() *util.WorkloadInfo { + if f.workloadInfo != nil { + return f.workloadInfo + } + return testWorkloadInfo(3, 1, "stable", "update") +} + +func (f *fakePartitionController) ListOwnedPods() ([]*corev1.Pod, error) { + f.listCalls++ + return f.pods, f.listErr +} + +func (f *fakePartitionController) CalculateBatchContext(*v1beta1.BatchRelease) (*batchcontext.BatchContext, error) { + f.calculateCalls++ + if f.calcErr != nil { + return nil, f.calcErr + } + if f.batchCtx != nil { + return f.batchCtx, nil + } + return readyBatchContext(), nil +} + +func (f *fakePartitionController) Initialize(context.Context, *v1beta1.BatchRelease) error { + f.initCalls++ + return f.initErr +} + +func (f *fakePartitionController) UpgradeBatch(context.Context, *batchcontext.BatchContext) error { + f.upgradeCalls++ + return f.upgradeErr +} + +func (f *fakePartitionController) Finalize(context.Context, *v1beta1.BatchRelease) error { + f.finalizeCalls++ + return f.finalizeErr +} + +func (f *fakePartitionController) IsMinReadyControl() bool { + return f.minReady +} + +type fakeBatchLabelPatcher struct { + calls int + err error +} + +func (f *fakeBatchLabelPatcher) PatchPodBatchLabel(*batchcontext.BatchContext) error { + f.calls++ + return f.err +} + +func TestNewControlPlaneCopiesReleaseAndNormalizesContext(t *testing.T) { + release := testBatchRelease() + status := &v1beta1.BatchReleaseStatus{} + controller := &fakePartitionController{} + rc := NewControlPlane(nil, func(client.Client, types.NamespacedName, schema.GroupVersionKind) Interface { + return controller + }, fake.NewClientBuilder().Build(), record.NewFakeRecorder(10), release, status, types.NamespacedName{}, schema.GroupVersionKind{}) + + if rc.ctx == nil { + t.Fatalf("ctx is nil, want background context") + } + if rc.release == release { + t.Fatalf("release was not deep-copied") + } + release.Name = "changed" + if rc.release.Name == "changed" { + t.Fatalf("release mutation leaked into control plane copy") + } + + ctx := context.WithValue(context.Background(), struct{}{}, "value") + if nonNilContext(ctx) != ctx { + t.Fatalf("nonNilContext did not preserve non-nil context") + } +} + +func TestControlPlaneInitializeRecordsMinReadyWorkloadInfo(t *testing.T) { + controller := &fakePartitionController{ + minReady: true, + workloadInfo: testWorkloadInfo(5, 2, "stable", "update"), + } + status := &v1beta1.BatchReleaseStatus{} + rc := newTestControlPlane(controller, status) + + if err := rc.Initialize(); err != nil { + t.Fatalf("Initialize() error = %v", err) + } + if controller.initCalls != 1 { + t.Fatalf("initCalls = %d, want 1", controller.initCalls) + } + if status.StableRevision != "stable" || status.UpdateRevision != "update" || status.ObservedWorkloadReplicas != 5 { + t.Fatalf("status revisions/replicas not updated: %#v", status) + } + condition := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyInitialized) + if condition == nil || condition.Reason != "MinReadyInitialized" { + t.Fatalf("MinReadyInitialized condition = %#v", condition) + } +} + +func TestControlPlaneUpgradeBatchMinReadyPaths(t *testing.T) { + t.Run("no replicas records ready without upgrading", func(t *testing.T) { + controller := &fakePartitionController{ + minReady: true, + workloadInfo: testWorkloadInfo(0, 0, "stable", "update"), + } + status := &v1beta1.BatchReleaseStatus{} + rc := newTestControlPlane(controller, status) + + if err := rc.UpgradeBatch(); err != nil { + t.Fatalf("UpgradeBatch() error = %v", err) + } + if controller.upgradeCalls != 0 { + t.Fatalf("upgradeCalls = %d, want 0", controller.upgradeCalls) + } + condition := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyBatching) + if condition == nil || condition.Reason != "MinReadyBatching" { + t.Fatalf("MinReadyBatching condition = %#v", condition) + } + }) + + t.Run("successful upgrade patches labels and records normal condition", func(t *testing.T) { + controller := &fakePartitionController{minReady: true} + patcher := &fakeBatchLabelPatcher{} + status := &v1beta1.BatchReleaseStatus{} + rc := newTestControlPlane(controller, status) + rc.patcher = patcher + + if err := rc.UpgradeBatch(); err != nil { + t.Fatalf("UpgradeBatch() error = %v", err) + } + if controller.calculateCalls != 1 || controller.upgradeCalls != 1 || patcher.calls != 1 { + t.Fatalf("calls calculate=%d upgrade=%d patch=%d, want 1/1/1", controller.calculateCalls, controller.upgradeCalls, patcher.calls) + } + condition := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyBatching) + if condition == nil || condition.Reason != "MinReadyBatching" { + t.Fatalf("MinReadyBatching condition = %#v", condition) + } + }) + + t.Run("calculate error records degraded condition", func(t *testing.T) { + controller := &fakePartitionController{ + minReady: true, + calcErr: errors.Join(errors.New("strategy drift"), ErrMinReadyDriftDetected), + } + status := &v1beta1.BatchReleaseStatus{} + rc := newTestControlPlane(controller, status) + + if err := rc.UpgradeBatch(); err == nil { + t.Fatalf("UpgradeBatch() error = nil, want error") + } + condition := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyDegraded) + if condition == nil || condition.Reason != "MinReadyDegradedDriftDetected" { + t.Fatalf("MinReadyDegraded condition = %#v", condition) + } + if status.Message == "" { + t.Fatalf("status.Message is empty, want degraded error") + } + }) +} + +func TestControlPlaneEnsureBatchPodsReadyAndLabeled(t *testing.T) { + t.Run("not ready returns readiness error", func(t *testing.T) { + controller := &fakePartitionController{ + minReady: true, + batchCtx: &batchcontext.BatchContext{ + Replicas: 3, + DesiredUpdatedReplicas: 3, + UpdatedReplicas: 1, + }, + } + now := metav1.Now() + status := &v1beta1.BatchReleaseStatus{ + Conditions: []v1beta1.RolloutCondition{{ + Type: v1beta1.RolloutConditionMinReadyBatching, + Status: corev1.ConditionTrue, + LastTransitionTime: now, + }}, + } + rc := newTestControlPlane(controller, status) + + if err := rc.EnsureBatchPodsReadyAndLabeled(); err == nil { + t.Fatalf("EnsureBatchPodsReadyAndLabeled() error = nil, want not ready error") + } + if controller.calculateCalls != 1 { + t.Fatalf("calculateCalls = %d, want 1", controller.calculateCalls) + } + }) + + t.Run("ready records batch ready", func(t *testing.T) { + controller := &fakePartitionController{minReady: true} + status := &v1beta1.BatchReleaseStatus{} + rc := newTestControlPlane(controller, status) + + if err := rc.EnsureBatchPodsReadyAndLabeled(); err != nil { + t.Fatalf("EnsureBatchPodsReadyAndLabeled() error = %v", err) + } + condition := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyBatching) + if condition == nil || condition.Reason != "MinReadyBatchReady" { + t.Fatalf("MinReadyBatchReady condition = %#v", condition) + } + }) +} + +func TestControlPlaneFinalizeMinReadyPaths(t *testing.T) { + t.Run("not found is ignored", func(t *testing.T) { + controller := &fakePartitionController{ + buildErr: apierrors.NewNotFound(schema.GroupResource{Group: "apps", Resource: "deployments"}, "missing"), + } + rc := newTestControlPlane(controller, &v1beta1.BatchReleaseStatus{}) + + if err := rc.Finalize(); err != nil { + t.Fatalf("Finalize() error = %v", err) + } + }) + + t.Run("successful minReady finalize clears degraded condition", func(t *testing.T) { + controller := &fakePartitionController{minReady: true} + status := &v1beta1.BatchReleaseStatus{ + Message: "previous degraded", + Conditions: []v1beta1.RolloutCondition{{ + Type: v1beta1.RolloutConditionMinReadyDegraded, + Status: corev1.ConditionTrue, + Reason: "MinReadyDegradedDriftDetected", + }}, + } + rc := newTestControlPlane(controller, status) + + if err := rc.Finalize(); err != nil { + t.Fatalf("Finalize() error = %v", err) + } + if controller.finalizeCalls != 1 { + t.Fatalf("finalizeCalls = %d, want 1", controller.finalizeCalls) + } + finalized := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyFinalized) + if finalized == nil || finalized.Reason != "MinReadyFinalized" { + t.Fatalf("MinReadyFinalized condition = %#v", finalized) + } + degraded := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyDegraded) + if degraded == nil || degraded.Status != corev1.ConditionFalse { + t.Fatalf("MinReadyDegraded condition = %#v, want false", degraded) + } + if status.Message != "" { + t.Fatalf("status.Message = %q, want empty", status.Message) + } + }) +} + +func TestControlPlaneSyncWorkloadInformationStates(t *testing.T) { + tests := []struct { + name string + release func() *v1beta1.BatchRelease + controller *fakePartitionController + wantEvent controlpkg.WorkloadEventType + wantErr bool + }{ + { + name: "deleted release is ignored", + release: func() *v1beta1.BatchRelease { + release := testBatchRelease() + now := metav1.Now() + release.DeletionTimestamp = &now + return release + }, + controller: &fakePartitionController{}, + wantEvent: controlpkg.WorkloadNormalState, + }, + { + name: "workload gone", + controller: &fakePartitionController{ + buildErr: apierrors.NewNotFound(schema.GroupResource{Group: "apps", Resource: "deployments"}, "missing"), + }, + wantEvent: controlpkg.WorkloadHasGone, + wantErr: true, + }, + { + name: "build error", + controller: &fakePartitionController{buildErr: errors.New("build failed")}, + wantEvent: controlpkg.WorkloadUnknownState, + wantErr: true, + }, + { + name: "still reconciling", + controller: &fakePartitionController{workloadInfo: &util.WorkloadInfo{ + LogKey: "workload", + ObjectMeta: metav1.ObjectMeta{ + Generation: 2, + }, + Replicas: 5, + Status: util.WorkloadStatus{ + Replicas: 5, + UpdatedReplicas: 2, + ObservedGeneration: 1, + StableRevision: "stable", + UpdateRevision: "update", + }, + }}, + wantEvent: controlpkg.WorkloadStillReconciling, + }, + { + name: "promoted", + controller: &fakePartitionController{workloadInfo: testWorkloadInfo(5, 5, "stable", "update")}, + wantEvent: controlpkg.WorkloadNormalState, + }, + { + name: "scaling", + controller: &fakePartitionController{workloadInfo: testWorkloadInfo(6, 2, "stable", "update")}, + wantEvent: controlpkg.WorkloadReplicasChanged, + }, + { + name: "rollback", + controller: &fakePartitionController{workloadInfo: testWorkloadInfo(5, 2, "stable", "stable")}, + wantEvent: controlpkg.WorkloadRollbackInBatch, + }, + { + name: "revision changed", + controller: &fakePartitionController{workloadInfo: testWorkloadInfo(5, 2, "stable", "other")}, + wantEvent: controlpkg.WorkloadPodTemplateChanged, + }, + { + name: "normal", + controller: &fakePartitionController{workloadInfo: testWorkloadInfo(5, 2, "stable", "update")}, + wantEvent: controlpkg.WorkloadNormalState, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + status := &v1beta1.BatchReleaseStatus{ + StableRevision: "stable", + UpdateRevision: "update", + ObservedWorkloadReplicas: 5, + } + rc := newTestControlPlane(tt.controller, status) + if tt.release != nil { + rc.release = tt.release() + } + + got, info, err := rc.SyncWorkloadInformation() + if (err != nil) != tt.wantErr { + t.Fatalf("SyncWorkloadInformation() error = %v, wantErr %v", err, tt.wantErr) + } + if got != tt.wantEvent { + t.Fatalf("event = %s, want %s", got, tt.wantEvent) + } + if tt.name == "deleted release is ignored" && info != nil { + t.Fatalf("info = %#v, want nil for deleted release", info) + } + }) + } +} + +func TestControlPlaneNoNeedUpdateReplicaHelpers(t *testing.T) { + t.Run("rollback without rollout id returns current updated replicas", func(t *testing.T) { + controller := &fakePartitionController{} + status := &v1beta1.BatchReleaseStatus{ + CanaryStatus: v1beta1.BatchReleaseCanaryStatus{UpdatedReplicas: 2}, + } + rc := newTestControlPlane(controller, status) + rc.release.Annotations = map[string]string{v1alpha1.RollbackInBatchAnnotation: "true"} + + got, err := rc.markNoNeedUpdatePodsIfNeeds() + if err != nil { + t.Fatalf("markNoNeedUpdatePodsIfNeeds() error = %v", err) + } + if got == nil || *got != 2 { + t.Fatalf("noNeedUpdateReplicas = %v, want 2", got) + } + }) + + t.Run("count refreshes status from matching pods", func(t *testing.T) { + noNeed := int32(0) + controller := &fakePartitionController{ + pods: []*corev1.Pod{ + testPod("matched", map[string]string{ + apps.ControllerRevisionHashLabelKey: "hash", + util.NoNeedUpdatePodLabel: "rollout-1", + }), + testPod("different-rollout", map[string]string{ + apps.ControllerRevisionHashLabelKey: "hash", + util.NoNeedUpdatePodLabel: "rollout-2", + }), + testPod("different-revision", map[string]string{ + apps.ControllerRevisionHashLabelKey: "old", + util.NoNeedUpdatePodLabel: "rollout-1", + }), + }, + } + status := &v1beta1.BatchReleaseStatus{ + UpdateRevision: "hash", + CanaryStatus: v1beta1.BatchReleaseCanaryStatus{NoNeedUpdateReplicas: &noNeed}, + } + rc := newTestControlPlane(controller, status) + rc.release.Spec.ReleasePlan.RolloutID = "rollout-1" + rc.release.Status.UpdateRevision = "hash" + rc.release.Status.CanaryStatus.NoNeedUpdateReplicas = &noNeed + + if err := rc.countAndUpdateNoNeedUpdateReplicas(); err != nil { + t.Fatalf("countAndUpdateNoNeedUpdateReplicas() error = %v", err) + } + if *status.CanaryStatus.NoNeedUpdateReplicas != 1 { + t.Fatalf("status noNeedUpdateReplicas = %d, want 1", *status.CanaryStatus.NoNeedUpdateReplicas) + } + if *rc.release.Status.CanaryStatus.NoNeedUpdateReplicas != 1 { + t.Fatalf("release noNeedUpdateReplicas = %d, want 1", *rc.release.Status.CanaryStatus.NoNeedUpdateReplicas) + } + }) +} + +func newTestControlPlane(controller *fakePartitionController, status *v1beta1.BatchReleaseStatus) *realBatchControlPlane { + return &realBatchControlPlane{ + Interface: controller, + Client: fake.NewClientBuilder().Build(), + EventRecorder: record.NewFakeRecorder(20), + patcher: &fakeBatchLabelPatcher{}, + ctx: context.Background(), + release: testBatchRelease(), + newStatus: status, + } +} + +func testBatchRelease() *v1beta1.BatchRelease { + return &v1beta1.BatchRelease{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "default", + Name: "release", + }, + Spec: v1beta1.BatchReleaseSpec{ + ReleasePlan: v1beta1.ReleasePlan{ + Batches: []v1beta1.ReleaseBatch{ + {CanaryReplicas: intstr.FromInt(1)}, + {CanaryReplicas: intstr.FromInt(3)}, + }, + }, + }, + } +} + +func testWorkloadInfo(replicas, updatedReplicas int32, stableRevision, updateRevision string) *util.WorkloadInfo { + return &util.WorkloadInfo{ + LogKey: "workload", + ObjectMeta: metav1.ObjectMeta{ + Generation: 1, + }, + Replicas: replicas, + Status: util.WorkloadStatus{ + Replicas: replicas, + UpdatedReplicas: updatedReplicas, + ObservedGeneration: 1, + StableRevision: stableRevision, + UpdateRevision: updateRevision, + }, + } +} + +func readyBatchContext() *batchcontext.BatchContext { + return &batchcontext.BatchContext{ + Replicas: 3, + CurrentBatch: 0, + UpdatedReplicas: 1, + UpdatedReadyReplicas: 1, + PlannedUpdatedReplicas: 1, + DesiredUpdatedReplicas: 1, + DesiredPartition: intstr.FromInt(2), + CurrentPartition: intstr.FromInt(3), + NoNeedUpdatedReplicas: nil, + FailureThreshold: nil, + Pods: nil, + UpdateRevision: "update", + RolloutID: "", + DesiredSurge: intstr.FromInt(0), + CurrentSurge: intstr.FromInt(0), + } +} + +func testPod(name string, labels map[string]string) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "default", + Name: name, + Labels: labels, + }, + } +} diff --git a/pkg/util/patch/patch_utils_test.go b/pkg/util/patch/patch_utils_test.go index ae806847..e8bb8faf 100644 --- a/pkg/util/patch/patch_utils_test.go +++ b/pkg/util/patch/patch_utils_test.go @@ -17,11 +17,15 @@ limitations under the License. package patch import ( + "encoding/json" "fmt" "reflect" "testing" + apps "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" "github.com/openkruise/rollouts/pkg/util" ) @@ -43,3 +47,143 @@ func TestCommonPatch(t *testing.T) { t.Fatalf("Not equal: \n%s \n%s", expectedPatchBody, patchReq.String()) } } + +func TestMergePatchHelpers(t *testing.T) { + patchReq := NewMergePatch(). + OverrideFinalizer([]string{"finalizer-a"}). + InsertLabel("label-a", "value-a"). + DeleteLabel("label-b"). + InsertAnnotation("annotation-a", "value-b"). + DeleteAnnotation("annotation-b") + + if patchReq.Type() != types.MergePatchType { + t.Fatalf("Type() = %s, want %s", patchReq.Type(), types.MergePatchType) + } + data, err := patchReq.Data(nil) + if err != nil { + t.Fatalf("Data() error = %v", err) + } + if string(data) != patchReq.String() { + t.Fatalf("Data() = %s, want %s", string(data), patchReq.String()) + } + + var got map[string]interface{} + if err := json.Unmarshal(data, &got); err != nil { + t.Fatalf("patch json is malformed: %v", err) + } + metadata := got["metadata"].(map[string]interface{}) + finalizers := metadata["finalizers"].([]interface{}) + if finalizers[0] != "finalizer-a" { + t.Fatalf("finalizers = %v, want finalizer-a", finalizers) + } + labels := metadata["labels"].(map[string]interface{}) + if labels["label-a"] != "value-a" || labels["label-b"] != nil { + t.Fatalf("labels = %v", labels) + } + annotations := metadata["annotations"].(map[string]interface{}) + if annotations["annotation-a"] != "value-b" || annotations["annotation-b"] != nil { + t.Fatalf("annotations = %v", annotations) + } +} + +func TestDeploymentPatchHelpers(t *testing.T) { + progressDeadlineSeconds := int32(600) + maxSurge := intstr.FromString("25%") + maxUnavailable := intstr.FromInt(1) + strategy := apps.DeploymentStrategy{Type: apps.RollingUpdateDeploymentStrategyType} + + strategyPatch := NewDeploymentPatch().UpdateStrategy(strategy) + var strategyOnly map[string]interface{} + if err := json.Unmarshal([]byte(strategyPatch.String()), &strategyOnly); err != nil { + t.Fatalf("strategy patch json is malformed: %v", err) + } + if strategyOnly["spec"].(map[string]interface{})["strategy"].(map[string]interface{})["type"] != string(apps.RollingUpdateDeploymentStrategyType) { + t.Fatalf("strategy patch = %v", strategyOnly) + } + + patchReq := NewDeploymentPatch(). + UpdatePaused(true). + UpdateMinReadySeconds(30). + UpdateProgressDeadlineSeconds(&progressDeadlineSeconds). + UpdateMaxSurge(&maxSurge). + UpdateMaxUnavailable(&maxUnavailable) + + var got map[string]interface{} + if err := json.Unmarshal([]byte(patchReq.String()), &got); err != nil { + t.Fatalf("patch json is malformed: %v", err) + } + spec := got["spec"].(map[string]interface{}) + if spec["paused"] != true { + t.Fatalf("paused = %v, want true", spec["paused"]) + } + if spec["minReadySeconds"] != float64(30) { + t.Fatalf("minReadySeconds = %v, want 30", spec["minReadySeconds"]) + } + if spec["progressDeadlineSeconds"] != float64(600) { + t.Fatalf("progressDeadlineSeconds = %v, want 600", spec["progressDeadlineSeconds"]) + } + rollingUpdate := spec["strategy"].(map[string]interface{})["rollingUpdate"].(map[string]interface{}) + if rollingUpdate["maxSurge"] != "25%" { + t.Fatalf("maxSurge = %v", rollingUpdate["maxSurge"]) + } + if rollingUpdate["maxUnavailable"] != float64(1) { + t.Fatalf("maxUnavailable = %v", rollingUpdate["maxUnavailable"]) + } +} + +func TestDeploymentPatchUpdateRecreateStrategyClearsRollingUpdate(t *testing.T) { + patchReq := NewDeploymentPatch().UpdateRecreateStrategy() + + var got map[string]interface{} + if err := json.Unmarshal([]byte(patchReq.String()), &got); err != nil { + t.Fatalf("patch json is malformed: %v", err) + } + strategy := got["spec"].(map[string]interface{})["strategy"].(map[string]interface{}) + if strategy["type"] != string(apps.RecreateDeploymentStrategyType) { + t.Fatalf("strategy.type = %v, want Recreate", strategy["type"]) + } + if _, ok := strategy["rollingUpdate"]; !ok { + t.Fatalf("rollingUpdate field missing, want explicit null") + } + if strategy["rollingUpdate"] != nil { + t.Fatalf("rollingUpdate = %v, want nil", strategy["rollingUpdate"]) + } +} + +func TestClonesetPatchHelpers(t *testing.T) { + partition := intstr.FromInt(3) + maxSurge := intstr.FromString("20%") + maxUnavailable := intstr.FromInt(1) + + patchReq := NewClonesetPatch(). + UpdateMinReadySeconds(10). + UpdatePaused(true). + UpdatePartiton(&partition). + UpdateMaxSurge(&maxSurge). + UpdateMaxUnavailable(&maxUnavailable) + + if patchReq.Type() != types.MergePatchType { + t.Fatalf("Type() = %s, want %s", patchReq.Type(), types.MergePatchType) + } + var got map[string]interface{} + if err := json.Unmarshal([]byte(patchReq.String()), &got); err != nil { + t.Fatalf("patch json is malformed: %v", err) + } + spec := got["spec"].(map[string]interface{}) + if spec["minReadySeconds"] != float64(10) { + t.Fatalf("minReadySeconds = %v, want 10", spec["minReadySeconds"]) + } + updateStrategy := spec["updateStrategy"].(map[string]interface{}) + if updateStrategy["paused"] != true { + t.Fatalf("paused = %v, want true", updateStrategy["paused"]) + } + if updateStrategy["partition"] != float64(3) { + t.Fatalf("partition = %v", updateStrategy["partition"]) + } + if updateStrategy["maxSurge"] != "20%" { + t.Fatalf("maxSurge = %v", updateStrategy["maxSurge"]) + } + if updateStrategy["maxUnavailable"] != float64(1) { + t.Fatalf("maxUnavailable = %v", updateStrategy["maxUnavailable"]) + } +} From 0e1e77520b7c94ef599d8684540b49e1d0125677 Mon Sep 17 00:00:00 2001 From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Date: Sun, 14 Jun 2026 19:17:12 +0800 Subject: [PATCH 09/22] fix: refresh minready annotations on continuous release Signed-off-by: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> --- ...t-minready-seconds-progressive-delivery.md | 7 +-- .../deployment/minready_control.go | 40 ++++++++++++++-- .../deployment/minready_control_test.go | 16 +++++++ .../mutating/workload_update_handler.go | 11 ++++- .../mutating/workload_update_handler_test.go | 48 +++++++++++++++++++ 5 files changed, 113 insertions(+), 9 deletions(-) diff --git a/docs/proposals/20260315-deployment-minready-seconds-progressive-delivery.md b/docs/proposals/20260315-deployment-minready-seconds-progressive-delivery.md index c94cef7b..6d9f9522 100644 --- a/docs/proposals/20260315-deployment-minready-seconds-progressive-delivery.md +++ b/docs/proposals/20260315-deployment-minready-seconds-progressive-delivery.md @@ -297,7 +297,8 @@ func NewMinReadyController(cli client.Client, key types.NamespacedName, gvk sche - PDB presence is not an eligibility failure. PDBs protect Eviction API flows, not Deployment rolling updates, so they are not used as the batch-safety mechanism. 2. **Annotation persistence** (`writeOriginalAnnotations`): - - If any of the three annotations is already present, validate that all three exist (idempotency check) and that the on-disk fields are already inflated. If consistent, no-op. + - If any of the three annotations is already present, validate that all three exist (idempotency check). If the on-disk fields are already inflated, no-op. + - If a continuous release supplies new user-owned `minReadySeconds` / `progressDeadlineSeconds` while annotations already exist, refresh those original annotations before re-inflating. - Otherwise, serialize the current values of `minReadySeconds`, `progressDeadlineSeconds`, and `maxUnavailable` per the serialization rules above and write all three annotations. 3. **Field inflation** (`inflateDeploymentStrategy`): Set `minReadySeconds`, `progressDeadlineSeconds`, and `maxUnavailable` to their MinReadySeconds values. Leave `maxSurge` unchanged. @@ -422,8 +423,8 @@ if isMinReadySecondsStrategy(rollout, deployment) { // MinReady keeps the native controller running, so it must NOT be paused. // Inflate synchronously at admission time so the native controller never // observes the user's original budget in the window between admission and - // MinReadyControl.Initialize. Initialize stays the fallback and validates - // (instead of rewriting) annotations that already exist. + // MinReadyControl.Initialize. Continuous releases refresh user-owned + // availability annotations before re-inflation. if err := partitiondeployment.EnrollMinReadyDeployment(newObj); err != nil { klog.Warningf("Skip MinReady enrollment for Deployment(%s/%s): %v", ...) } diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go index ec57c0e9..128d959c 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go @@ -194,12 +194,19 @@ func ensureOriginalAnnotations(deployment *apps.Deployment) error { } func writeOriginalAnnotations(original, modified *apps.Deployment) { + if modified.Annotations == nil { + modified.Annotations = map[string]string{} + } + writeOriginalAvailabilityAnnotations(original, modified) + modified.Annotations[AnnotationOriginalMaxUnavailable] = serializeOriginalIntOrString(originalMaxUnavailable(original)) +} + +func writeOriginalAvailabilityAnnotations(original, modified *apps.Deployment) { if modified.Annotations == nil { modified.Annotations = map[string]string{} } modified.Annotations[AnnotationOriginalMinReadySeconds] = serializeOriginalInt32(&original.Spec.MinReadySeconds) modified.Annotations[AnnotationOriginalProgressDeadlineSeconds] = serializeOriginalInt32(original.Spec.ProgressDeadlineSeconds) - modified.Annotations[AnnotationOriginalMaxUnavailable] = serializeOriginalIntOrString(originalMaxUnavailable(original)) } func originalMaxUnavailable(deployment *apps.Deployment) *intstr.IntOrString { @@ -227,8 +234,9 @@ func inflateDeploymentStrategy(deployment *apps.Deployment) { // annotations and inflates them in place. The workload mutating webhook calls // it when a Deployment enters rollout progressing, so the native controller // never observes the original maxUnavailable/minReadySeconds budget between -// admission and MinReadyControl.Initialize; Initialize stays the fallback and -// validates (instead of rewriting) annotations that already exist. +// admission and MinReadyControl.Initialize. If a continuous release updates the +// user-owned availability fields while MinReady annotations already exist, +// enrollment refreshes those original annotations before re-inflating. func EnrollMinReadyDeployment(deployment *apps.Deployment) error { if err := validateDeploymentStrategyType(deployment); err != nil { return err @@ -239,7 +247,13 @@ func EnrollMinReadyDeployment(deployment *apps.Deployment) error { return err } if err := validateInflatedDeploymentStrategy(snapshot); err != nil { - return err + if !hasOriginalAvailabilityChange(snapshot) { + return err + } + if err := validateMinReadyRefreshableDeployment(snapshot); err != nil { + return err + } + writeOriginalAvailabilityAnnotations(snapshot, deployment) } } else { writeOriginalAnnotations(snapshot, deployment) @@ -289,6 +303,24 @@ func validateInflatedDeploymentStrategy(deployment *apps.Deployment) error { return nil } +func hasOriginalAvailabilityChange(deployment *apps.Deployment) bool { + if deployment.Spec.MinReadySeconds != InflatedMinReadySeconds { + return true + } + return deployment.Spec.ProgressDeadlineSeconds == nil || + *deployment.Spec.ProgressDeadlineSeconds != InflatedProgressDeadlineSeconds +} + +func validateMinReadyRefreshableDeployment(deployment *apps.Deployment) error { + if deployment.Spec.Paused { + return fmt.Errorf("%w: deployment is paused", partitionstyle.ErrMinReadyDriftDetected) + } + if deployment.Spec.Strategy.RollingUpdate == nil { + return fmt.Errorf("%w: rollingUpdate is nil", partitionstyle.ErrMinReadyDriftDetected) + } + return nil +} + func validateDeploymentStrategyType(deployment *apps.Deployment) error { if deployment.Spec.Strategy.Type != apps.RollingUpdateDeploymentStrategyType { return fmt.Errorf("%w: deployment strategy type %s is not RollingUpdate", diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go index 87ee6985..30cd8cfb 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go @@ -509,6 +509,22 @@ func TestEnrollMinReadyDeploymentValidatesExistingAnnotations(t *testing.T) { } } +func TestEnrollMinReadyDeploymentRefreshesAvailabilityAnnotationsForContinuousRelease(t *testing.T) { + deployment := newInflatedMinReadyDeployment() + addMinReadyOriginalAnnotations(deployment) + deployment.Spec.MinReadySeconds = 9 + deployment.Spec.ProgressDeadlineSeconds = pointer.Int32(90) + + if err := EnrollMinReadyDeployment(deployment); err != nil { + t.Fatalf("EnrollMinReadyDeployment failed: %v", err) + } + + assertAnnotation(t, deployment.Annotations, AnnotationOriginalMinReadySeconds, "9") + assertAnnotation(t, deployment.Annotations, AnnotationOriginalProgressDeadlineSeconds, "90") + assertAnnotation(t, deployment.Annotations, AnnotationOriginalMaxUnavailable, "25%") + assertMinReadyInflated(t, deployment) +} + func TestEnrollMinReadyDeploymentRejectsRecreate(t *testing.T) { deployment := newMinReadyDeployment() deployment.Spec.Strategy.Type = apps.RecreateDeploymentStrategyType diff --git a/pkg/webhook/workload/mutating/workload_update_handler.go b/pkg/webhook/workload/mutating/workload_update_handler.go index c58a701f..ed39bceb 100644 --- a/pkg/webhook/workload/mutating/workload_update_handler.go +++ b/pkg/webhook/workload/mutating/workload_update_handler.go @@ -242,6 +242,13 @@ func (h *WorkloadHandler) handleDeployment(newObj, oldObj *apps.Deployment) (boo if newObj.Annotations[util.InRolloutProgressingAnnotation] != "" { modified := false if isMinReadySecondsStrategy(rollout, newObj) { + if isEffectiveDeploymentRevisionChange(oldObj, newObj) { + if err := partitiondeployment.EnrollMinReadyDeployment(newObj); err != nil { + klog.Warningf("Skip MinReady continuous enrollment for Deployment(%s/%s): %v", newObj.Namespace, newObj.Name, err) + return enforceMinReadyInflation(newObj), nil + } + return true, nil + } return enforceMinReadyInflation(newObj), nil } strategy := util.GetDeploymentStrategy(newObj) @@ -335,8 +342,8 @@ func (h *WorkloadHandler) handleDeployment(newObj, oldObj *apps.Deployment) (boo // Inflate the strategy synchronously at admission time: this snapshots the // original fields into annotations and sets minReadySeconds/maxUnavailable // so the native controller never observes the user's original budget in the - // window between admission and MinReadyControl.Initialize. Initialize stays - // the fallback and validates (instead of rewriting) annotations that exist. + // window between admission and MinReadyControl.Initialize. Continuous + // releases refresh user-owned availability annotations before re-inflation. if err := partitiondeployment.EnrollMinReadyDeployment(newObj); err != nil { // Do not block admission; the controller's Initialize will surface a // degraded condition for an unsupported strategy instead. diff --git a/pkg/webhook/workload/mutating/workload_update_handler_test.go b/pkg/webhook/workload/mutating/workload_update_handler_test.go index ee2e3cc4..151b8fce 100644 --- a/pkg/webhook/workload/mutating/workload_update_handler_test.go +++ b/pkg/webhook/workload/mutating/workload_update_handler_test.go @@ -646,6 +646,54 @@ func TestHandlerDeployment(t *testing.T) { return rolloutDemo.DeepCopy() }, }, + { + name: "minready continuous release refreshes original availability annotations", + getObjs: func() (*apps.Deployment, *apps.Deployment) { + oldObj := deploymentDemo.DeepCopy() + oldObj.Spec.Template.Spec.Containers[0].Image = "echoserver:v2" + oldObj.Annotations[util.InRolloutProgressingAnnotation] = `{"rolloutName":"rollout-demo","RolloutDone":false}` + oldObj.Annotations[partitiondeployment.AnnotationOriginalMinReadySeconds] = "7" + oldObj.Annotations[partitiondeployment.AnnotationOriginalProgressDeadlineSeconds] = "60" + oldObj.Annotations[partitiondeployment.AnnotationOriginalMaxUnavailable] = "25%" + oldObj.Spec.Paused = false + oldObj.Spec.Strategy.Type = apps.RollingUpdateDeploymentStrategyType + maxUnavailable := intstr.FromInt(0) + oldObj.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{MaxUnavailable: &maxUnavailable} + oldObj.Spec.MinReadySeconds = partitiondeployment.InflatedMinReadySeconds + inflatedPDS := partitiondeployment.InflatedProgressDeadlineSeconds + oldObj.Spec.ProgressDeadlineSeconds = &inflatedPDS + + newObj := oldObj.DeepCopy() + newObj.Spec.Template.Spec.Containers[0].Image = "echoserver:v3" + newObj.Spec.MinReadySeconds = 9 + newObj.Spec.ProgressDeadlineSeconds = pointer.Int32(90) + return oldObj, newObj + }, + expectObj: func() *apps.Deployment { + obj := deploymentDemo.DeepCopy() + obj.Spec.Template.Spec.Containers[0].Image = "echoserver:v3" + obj.Annotations[util.InRolloutProgressingAnnotation] = `{"rolloutName":"rollout-demo","RolloutDone":false}` + obj.Annotations[partitiondeployment.AnnotationOriginalMinReadySeconds] = "9" + obj.Annotations[partitiondeployment.AnnotationOriginalProgressDeadlineSeconds] = "90" + obj.Annotations[partitiondeployment.AnnotationOriginalMaxUnavailable] = "25%" + obj.Spec.Paused = false + obj.Spec.Strategy.Type = apps.RollingUpdateDeploymentStrategyType + maxUnavailable := intstr.FromInt(0) + obj.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{MaxUnavailable: &maxUnavailable} + obj.Spec.MinReadySeconds = partitiondeployment.InflatedMinReadySeconds + inflatedPDS := partitiondeployment.InflatedProgressDeadlineSeconds + obj.Spec.ProgressDeadlineSeconds = &inflatedPDS + return obj + }, + getRs: func() []*apps.ReplicaSet { + rs := rsDemo.DeepCopy() + return []*apps.ReplicaSet{rs} + }, + getRollout: func() *appsv1beta1.Rollout { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + return rolloutDemo.DeepCopy() + }, + }, { name: "set deployment paused = false, matched rollout, in finalising, allow", getObjs: func() (*apps.Deployment, *apps.Deployment) { From 6ca3928eef414f0ac156d65638aadb1d756a1de6 Mon Sep 17 00:00:00 2001 From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Date: Sun, 14 Jun 2026 20:14:16 +0800 Subject: [PATCH 10/22] test: add minready continuous e2e Signed-off-by: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> --- .../e2e-deployment-minready-1.24.yaml | 109 ++++++++++++++++++ .../e2e-deployment-minready-1.26.yaml | 109 ++++++++++++++++++ .../e2e-deployment-minready-1.28.yaml | 109 ++++++++++++++++++ test/e2e/deployment_minready_actions_test.go | 24 ++++ ...ployment_minready_scenarios_helper_test.go | 28 +++++ test/e2e/deployment_minready_test.go | 22 +++- 6 files changed, 396 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/e2e-deployment-minready-1.24.yaml create mode 100644 .github/workflows/e2e-deployment-minready-1.26.yaml create mode 100644 .github/workflows/e2e-deployment-minready-1.28.yaml diff --git a/.github/workflows/e2e-deployment-minready-1.24.yaml b/.github/workflows/e2e-deployment-minready-1.24.yaml new file mode 100644 index 00000000..2f5730b5 --- /dev/null +++ b/.github/workflows/e2e-deployment-minready-1.24.yaml @@ -0,0 +1,109 @@ +name: E2E-Deployment-MinReady-1.24 + +on: + push: + branches: + - master + - release-* + pull_request: {} + workflow_dispatch: {} + +# Declare default permissions as read only. +permissions: read-all + +env: + # Common versions + GO_VERSION: '1.20' + KIND_CLUSTER_NAME: 'ci-testing' + KIND_VERSION: 'v0.14.0' + KIND_IMAGE: 'kindest/node:v1.24.6' + +jobs: + + rollout: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + submodules: true + - name: Setup Go + uses: actions/setup-go@0aaccfd150d50ccaeb58ebd88d36e91967a5f35b # v5.4.0 + with: + go-version: ${{ env.GO_VERSION }} + - name: Setup Kind Cluster + uses: helm/kind-action@a1b0e391336a6ee6713a0583f8c6240d70863de3 # v1.12.0 + with: + node_image: ${{ env.KIND_IMAGE }} + cluster_name: ${{ env.KIND_CLUSTER_NAME }} + config: ./test/kind-conf.yaml + version: ${{ env.KIND_VERSION }} + - name: Build image + run: | + export IMAGE="openkruise/kruise-rollout:e2e-${GITHUB_RUN_ID}" + docker build --pull --no-cache . -t $IMAGE + kind load docker-image --name=${KIND_CLUSTER_NAME} $IMAGE || { echo >&2 "kind not installed or error loading image: $IMAGE"; exit 1; } + - name: Install Kruise + run: | + set -ex + kubectl cluster-info + make helm + helm repo add openkruise https://openkruise.github.io/charts/ + helm repo update + helm install kruise openkruise/kruise --version 1.7.0 + for ((i=1;i<10;i++)); + do + set +e + PODS=$(kubectl get pod -n kruise-system | grep '1/1' | grep kruise-controller-manager | wc -l) + set -e + if [ "$PODS" -eq "2" ]; then + break + fi + sleep 3 + done + set +e + PODS=$(kubectl get pod -n kruise-system | grep '1/1' | grep kruise-controller-manager | wc -l) + set -e + if [ "$PODS" -eq "2" ]; then + echo "Wait for kruise-manager ready successfully" + else + echo "Timeout to wait for kruise-manager ready" + exit 1 + fi + - name: Install Kruise Rollout + run: | + set -ex + kubectl cluster-info + IMG=openkruise/kruise-rollout:e2e-${GITHUB_RUN_ID} ./scripts/deploy_kind.sh + for ((i=1;i<10;i++)); + do + set +e + PODS=$(kubectl get pod -n kruise-rollout | grep '1/1' | wc -l) + set -e + if [ "$PODS" -eq "1" ]; then + break + fi + sleep 3 + done + set +e + PODS=$(kubectl get pod -n kruise-rollout | grep '1/1' | wc -l) + kubectl get node -o yaml + kubectl get all -n kruise-rollout -o yaml + set -e + if [ "$PODS" -eq "1" ]; then + echo "Wait for kruise-rollout ready successfully" + else + echo "Timeout to wait for kruise-rollout ready" + exit 1 + fi + - name: Run E2E Tests For Deployment MinReadySeconds + run: | + export KUBECONFIG=/home/runner/.kube/config + make ginkgo + set +e + ./bin/ginkgo -timeout 60m -v --focus='Deployment MinReadySeconds' test/e2e + retVal=$? + if [ "${retVal}" -ne 0 ];then + echo "test fail, dump kruise-rollout logs" + kubectl get pod -n kruise-rollout --no-headers | grep manager | awk '{print $1}' | xargs kubectl logs -n kruise-rollout + fi + exit $retVal diff --git a/.github/workflows/e2e-deployment-minready-1.26.yaml b/.github/workflows/e2e-deployment-minready-1.26.yaml new file mode 100644 index 00000000..9b4469db --- /dev/null +++ b/.github/workflows/e2e-deployment-minready-1.26.yaml @@ -0,0 +1,109 @@ +name: E2E-Deployment-MinReady-1.26 + +on: + push: + branches: + - master + - release-* + pull_request: {} + workflow_dispatch: {} + +# Declare default permissions as read only. +permissions: read-all + +env: + # Common versions + GO_VERSION: '1.20' + KIND_VERSION: 'v0.18.0' + KIND_IMAGE: 'kindest/node:v1.26.3' + KIND_CLUSTER_NAME: 'ci-testing' + +jobs: + + rollout: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + submodules: true + - name: Setup Go + uses: actions/setup-go@0aaccfd150d50ccaeb58ebd88d36e91967a5f35b # v5.4.0 + with: + go-version: ${{ env.GO_VERSION }} + - name: Setup Kind Cluster + uses: helm/kind-action@a1b0e391336a6ee6713a0583f8c6240d70863de3 # v1.12.0 + with: + node_image: ${{ env.KIND_IMAGE }} + cluster_name: ${{ env.KIND_CLUSTER_NAME }} + config: ./test/kind-conf.yaml + version: ${{ env.KIND_VERSION }} + - name: Build image + run: | + export IMAGE="openkruise/kruise-rollout:e2e-${GITHUB_RUN_ID}" + docker build --pull --no-cache . -t $IMAGE + kind load docker-image --name=${KIND_CLUSTER_NAME} $IMAGE || { echo >&2 "kind not installed or error loading image: $IMAGE"; exit 1; } + - name: Install Kruise + run: | + set -ex + kubectl cluster-info + make helm + helm repo add openkruise https://openkruise.github.io/charts/ + helm repo update + helm install kruise openkruise/kruise --version 1.7.0 + for ((i=1;i<10;i++)); + do + set +e + PODS=$(kubectl get pod -n kruise-system | grep '1/1' | grep kruise-controller-manager | wc -l) + set -e + if [ "$PODS" -eq "2" ]; then + break + fi + sleep 3 + done + set +e + PODS=$(kubectl get pod -n kruise-system | grep '1/1' | grep kruise-controller-manager | wc -l) + set -e + if [ "$PODS" -eq "2" ]; then + echo "Wait for kruise-manager ready successfully" + else + echo "Timeout to wait for kruise-manager ready" + exit 1 + fi + - name: Install Kruise Rollout + run: | + set -ex + kubectl cluster-info + IMG=openkruise/kruise-rollout:e2e-${GITHUB_RUN_ID} ./scripts/deploy_kind.sh + for ((i=1;i<10;i++)); + do + set +e + PODS=$(kubectl get pod -n kruise-rollout | grep '1/1' | wc -l) + set -e + if [ "$PODS" -eq "1" ]; then + break + fi + sleep 3 + done + set +e + PODS=$(kubectl get pod -n kruise-rollout | grep '1/1' | wc -l) + kubectl get node -o yaml + kubectl get all -n kruise-rollout -o yaml + set -e + if [ "$PODS" -eq "1" ]; then + echo "Wait for kruise-rollout ready successfully" + else + echo "Timeout to wait for kruise-rollout ready" + exit 1 + fi + - name: Run E2E Tests For Deployment MinReadySeconds + run: | + export KUBECONFIG=/home/runner/.kube/config + make ginkgo + set +e + ./bin/ginkgo -timeout 60m -v --focus='Deployment MinReadySeconds' test/e2e + retVal=$? + if [ "${retVal}" -ne 0 ];then + echo "test fail, dump kruise-rollout logs" + kubectl get pod -n kruise-rollout --no-headers | grep manager | awk '{print $1}' | xargs kubectl logs -n kruise-rollout + fi + exit $retVal diff --git a/.github/workflows/e2e-deployment-minready-1.28.yaml b/.github/workflows/e2e-deployment-minready-1.28.yaml new file mode 100644 index 00000000..27914002 --- /dev/null +++ b/.github/workflows/e2e-deployment-minready-1.28.yaml @@ -0,0 +1,109 @@ +name: E2E-Deployment-MinReady-1.28 + +on: + push: + branches: + - master + - release-* + pull_request: {} + workflow_dispatch: {} + +# Declare default permissions as read only. +permissions: read-all + +env: + # Common versions + GO_VERSION: '1.20' + KIND_VERSION: 'v0.22.0' + KIND_IMAGE: 'kindest/node:v1.28.7' + KIND_CLUSTER_NAME: 'ci-testing' + +jobs: + + rollout: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + submodules: true + - name: Setup Go + uses: actions/setup-go@0aaccfd150d50ccaeb58ebd88d36e91967a5f35b # v5.4.0 + with: + go-version: ${{ env.GO_VERSION }} + - name: Setup Kind Cluster + uses: helm/kind-action@a1b0e391336a6ee6713a0583f8c6240d70863de3 # v1.12.0 + with: + node_image: ${{ env.KIND_IMAGE }} + cluster_name: ${{ env.KIND_CLUSTER_NAME }} + config: ./test/kind-conf.yaml + version: ${{ env.KIND_VERSION }} + - name: Build image + run: | + export IMAGE="openkruise/kruise-rollout:e2e-${GITHUB_RUN_ID}" + docker build --pull --no-cache . -t $IMAGE + kind load docker-image --name=${KIND_CLUSTER_NAME} $IMAGE || { echo >&2 "kind not installed or error loading image: $IMAGE"; exit 1; } + - name: Install Kruise + run: | + set -ex + kubectl cluster-info + make helm + helm repo add openkruise https://openkruise.github.io/charts/ + helm repo update + helm install kruise openkruise/kruise --version 1.7.0 + for ((i=1;i<10;i++)); + do + set +e + PODS=$(kubectl get pod -n kruise-system | grep '1/1' | grep kruise-controller-manager | wc -l) + set -e + if [ "$PODS" -eq "2" ]; then + break + fi + sleep 3 + done + set +e + PODS=$(kubectl get pod -n kruise-system | grep '1/1' | grep kruise-controller-manager | wc -l) + set -e + if [ "$PODS" -eq "2" ]; then + echo "Wait for kruise-manager ready successfully" + else + echo "Timeout to wait for kruise-manager ready" + exit 1 + fi + - name: Install Kruise Rollout + run: | + set -ex + kubectl cluster-info + IMG=openkruise/kruise-rollout:e2e-${GITHUB_RUN_ID} ./scripts/deploy_kind.sh + for ((i=1;i<10;i++)); + do + set +e + PODS=$(kubectl get pod -n kruise-rollout | grep '1/1' | wc -l) + set -e + if [ "$PODS" -eq "1" ]; then + break + fi + sleep 3 + done + set +e + PODS=$(kubectl get pod -n kruise-rollout | grep '1/1' | wc -l) + kubectl get node -o yaml + kubectl get all -n kruise-rollout -o yaml + set -e + if [ "$PODS" -eq "1" ]; then + echo "Wait for kruise-rollout ready successfully" + else + echo "Timeout to wait for kruise-rollout ready" + exit 1 + fi + - name: Run E2E Tests For Deployment MinReadySeconds + run: | + export KUBECONFIG=/home/runner/.kube/config + make ginkgo + set +e + ./bin/ginkgo -timeout 60m -v --focus='Deployment MinReadySeconds' test/e2e + retVal=$? + if [ "${retVal}" -ne 0 ];then + echo "test fail, dump kruise-rollout logs" + kubectl get pod -n kruise-rollout --no-headers | grep manager | awk '{print $1}' | xargs kubectl logs -n kruise-rollout + fi + exit $retVal diff --git a/test/e2e/deployment_minready_actions_test.go b/test/e2e/deployment_minready_actions_test.go index 2fcf09ce..6b024efb 100644 --- a/test/e2e/deployment_minready_actions_test.go +++ b/test/e2e/deployment_minready_actions_test.go @@ -41,6 +41,13 @@ func finishMinReadyE2ERollout(namespace, name string) { waitMinReadyE2EDeploymentRestored(namespace) } +func finishMinReadyE2ERolloutWithAvailability(namespace, name string, minReadySeconds, progressDeadlineSeconds int32) { + resumeMinReadyE2ERollout(namespace, name) + resumeMinReadyE2ERollout(namespace, name) + waitMinReadyE2ERolloutPhase(namespace, name, v1beta1.RolloutPhaseHealthy) + waitMinReadyE2EDeploymentRestoredWithAvailability(namespace, minReadySeconds, progressDeadlineSeconds) +} + func waitMinReadyE2EDeploymentReady(namespace string) { Eventually(func() bool { deployment := &apps.Deployment{} @@ -75,6 +82,23 @@ func patchMinReadyE2EDeploymentReplicas(namespace string, replicas int32) { })).NotTo(HaveOccurred()) } +func updateMinReadyE2EDeploymentContinuousRelease(namespace, version string, minReadySeconds, progressDeadlineSeconds int32) { + Expect(retry.RetryOnConflict(retry.DefaultRetry, func() error { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + if err := k8sClient.Get(context.TODO(), key, deployment); err != nil { + return err + } + deployment.Spec.Template.Spec.Containers[0].Env = mergeEnvVar( + deployment.Spec.Template.Spec.Containers[0].Env, + corev1.EnvVar{Name: "NODE_NAME", Value: version}, + ) + deployment.Spec.MinReadySeconds = minReadySeconds + deployment.Spec.ProgressDeadlineSeconds = pointer.Int32(progressDeadlineSeconds) + return k8sClient.Update(context.TODO(), deployment) + })).NotTo(HaveOccurred()) +} + func patchMinReadyE2EMaxUnavailable(namespace string, value int) { Expect(retry.RetryOnConflict(retry.DefaultRetry, func() error { deployment := &apps.Deployment{} diff --git a/test/e2e/deployment_minready_scenarios_helper_test.go b/test/e2e/deployment_minready_scenarios_helper_test.go index 7c540c7a..24c6a140 100644 --- a/test/e2e/deployment_minready_scenarios_helper_test.go +++ b/test/e2e/deployment_minready_scenarios_helper_test.go @@ -43,6 +43,34 @@ func waitMinReadyE2EDeploymentReplicas(namespace string, replicas int32) { }, 5*time.Minute, time.Second).Should(BeTrue()) } +func waitMinReadyE2EOriginalAvailabilityAnnotations(namespace string, minReadySeconds, progressDeadlineSeconds int32) { + Eventually(func() bool { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + Expect(k8sClient.Get(context.TODO(), key, deployment)).Should(Succeed()) + return deployment.Annotations[partitiondeployment.AnnotationOriginalMinReadySeconds] == fmt.Sprintf("%d", minReadySeconds) && + deployment.Annotations[partitiondeployment.AnnotationOriginalProgressDeadlineSeconds] == fmt.Sprintf("%d", progressDeadlineSeconds) + }, 5*time.Minute, time.Second).Should(BeTrue()) +} + +func waitMinReadyE2EDeploymentRestoredWithAvailability(namespace string, minReadySeconds, progressDeadlineSeconds int32) { + Eventually(func() bool { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + Expect(k8sClient.Get(context.TODO(), key, deployment)).Should(Succeed()) + if deployment.Spec.ProgressDeadlineSeconds == nil || *deployment.Spec.ProgressDeadlineSeconds != progressDeadlineSeconds { + return false + } + for _, key := range partitiondeployment.AllOriginalAnnotations { + if deployment.Annotations[key] != "" { + return false + } + } + return deployment.Spec.MinReadySeconds == minReadySeconds && + deployment.Spec.Strategy.Type == apps.RollingUpdateDeploymentStrategyType + }, 10*time.Minute, time.Second).Should(BeTrue()) +} + func deleteMinReadyE2ERollout(namespace, name string) { rollout := &v1beta1.Rollout{} key := types.NamespacedName{Namespace: namespace, Name: name} diff --git a/test/e2e/deployment_minready_test.go b/test/e2e/deployment_minready_test.go index efd6ce39..2d2dc6f2 100644 --- a/test/e2e/deployment_minready_test.go +++ b/test/e2e/deployment_minready_test.go @@ -70,7 +70,19 @@ var _ = SIGDescribe("Deployment MinReadySeconds", func() { expectMinReadyE2EDeploymentVersion(namespace, "version1") }) - It("TC3 controller restart resumes from the persisted MinReadySeconds state", func() { + It("TC3 continuous release rolls v1 to v2 to v3 and refreshes original availability fields", func() { + rollout := startMinReadyE2ERollout(namespace) + waitMinReadyE2ERolloutStepPaused(namespace, rollout.Name, 1) + + updateMinReadyE2EDeploymentContinuousRelease(namespace, "version3", 9, 90) + waitMinReadyE2EDeploymentInflated(namespace) + waitMinReadyE2EOriginalAvailabilityAnnotations(namespace, 9, 90) + expectMinReadyE2EDeploymentVersion(namespace, "version3") + + finishMinReadyE2ERolloutWithAvailability(namespace, rollout.Name, 9, 90) + }) + + It("TC4 controller restart resumes from the persisted MinReadySeconds state", func() { rollout := startMinReadyE2ERollout(namespace) waitMinReadyE2ERolloutStepPaused(namespace, rollout.Name, 1) restartMinReadyE2EControllerManager() @@ -80,7 +92,7 @@ var _ = SIGDescribe("Deployment MinReadySeconds", func() { finishMinReadyE2ERollout(namespace, rollout.Name) }) - It("TC4 scale changes remain safe while rollout is active", func() { + It("TC5 scale changes remain safe while rollout is active", func() { rollout := makeMinReadyE2ERolloutWithReplicas(namespace, "25%", "50%", "100%") deployment := newMinReadyE2EDeployment(namespace) setMinReadyE2EInitialReplicas(deployment, 4) @@ -100,7 +112,7 @@ var _ = SIGDescribe("Deployment MinReadySeconds", func() { finishMinReadyE2ERollout(namespace, rollout.Name) }) - It("TC5 deleting Rollout restores annotations and lets native RollingUpdate continue", func() { + It("TC6 deleting Rollout restores annotations and lets native RollingUpdate continue", func() { rollout := startMinReadyE2ERollout(namespace) deleteMinReadyE2ERollout(namespace, rollout.Name) @@ -109,7 +121,7 @@ var _ = SIGDescribe("Deployment MinReadySeconds", func() { expectMinReadyE2EOriginalAnnotationAbsent(namespace) }) - It("TC6 GitOps drift records degraded status and preserves the external value", func() { + It("TC7 GitOps drift records degraded status and preserves the external value", func() { rollout := startMinReadyE2ERollout(namespace) waitMinReadyE2ERolloutStepPaused(namespace, rollout.Name, 1) patchMinReadyE2EMaxUnavailable(namespace, 5) @@ -120,7 +132,7 @@ var _ = SIGDescribe("Deployment MinReadySeconds", func() { expectMinReadyE2EInflatedMaxUnavailable(namespace, 5) }) - It("TC7 missing annotation blocks finalize until the operator restores it", func() { + It("TC8 missing annotation blocks finalize until the operator restores it", func() { rollout := startMinReadyE2ERollout(namespace) deleteMinReadyE2EOriginalAnnotation(namespace, partitiondeployment.AnnotationOriginalMaxUnavailable) resumeMinReadyE2ERollout(namespace, rollout.Name) From 9079581b189a664f3dbe0d06907fe5520e0f84d3 Mon Sep 17 00:00:00 2001 From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Date: Sun, 14 Jun 2026 21:26:23 +0800 Subject: [PATCH 11/22] ci: enable minready e2e feature gate Signed-off-by: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> --- .github/workflows/e2e-deployment-minready-1.24.yaml | 4 ++++ .github/workflows/e2e-deployment-minready-1.26.yaml | 4 ++++ .github/workflows/e2e-deployment-minready-1.28.yaml | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/.github/workflows/e2e-deployment-minready-1.24.yaml b/.github/workflows/e2e-deployment-minready-1.24.yaml index 2f5730b5..7e58fcac 100644 --- a/.github/workflows/e2e-deployment-minready-1.24.yaml +++ b/.github/workflows/e2e-deployment-minready-1.24.yaml @@ -74,6 +74,10 @@ jobs: set -ex kubectl cluster-info IMG=openkruise/kruise-rollout:e2e-${GITHUB_RUN_ID} ./scripts/deploy_kind.sh + # Enable MinReadySecondsStrategy feature gate. + kubectl patch deployment kruise-rollout-controller-manager -n kruise-rollout --type='json' \ + -p='[{"op": "replace", "path": "/spec/template/spec/containers/0/args/1", "value": "--feature-gates=AdvancedDeployment=true,MinReadySecondsStrategy=true"}]' + kubectl rollout status deployment/kruise-rollout-controller-manager -n kruise-rollout --timeout=120s for ((i=1;i<10;i++)); do set +e diff --git a/.github/workflows/e2e-deployment-minready-1.26.yaml b/.github/workflows/e2e-deployment-minready-1.26.yaml index 9b4469db..7be2c74c 100644 --- a/.github/workflows/e2e-deployment-minready-1.26.yaml +++ b/.github/workflows/e2e-deployment-minready-1.26.yaml @@ -74,6 +74,10 @@ jobs: set -ex kubectl cluster-info IMG=openkruise/kruise-rollout:e2e-${GITHUB_RUN_ID} ./scripts/deploy_kind.sh + # Enable MinReadySecondsStrategy feature gate. + kubectl patch deployment kruise-rollout-controller-manager -n kruise-rollout --type='json' \ + -p='[{"op": "replace", "path": "/spec/template/spec/containers/0/args/1", "value": "--feature-gates=AdvancedDeployment=true,MinReadySecondsStrategy=true"}]' + kubectl rollout status deployment/kruise-rollout-controller-manager -n kruise-rollout --timeout=120s for ((i=1;i<10;i++)); do set +e diff --git a/.github/workflows/e2e-deployment-minready-1.28.yaml b/.github/workflows/e2e-deployment-minready-1.28.yaml index 27914002..d3e23974 100644 --- a/.github/workflows/e2e-deployment-minready-1.28.yaml +++ b/.github/workflows/e2e-deployment-minready-1.28.yaml @@ -74,6 +74,10 @@ jobs: set -ex kubectl cluster-info IMG=openkruise/kruise-rollout:e2e-${GITHUB_RUN_ID} ./scripts/deploy_kind.sh + # Enable MinReadySecondsStrategy feature gate. + kubectl patch deployment kruise-rollout-controller-manager -n kruise-rollout --type='json' \ + -p='[{"op": "replace", "path": "/spec/template/spec/containers/0/args/1", "value": "--feature-gates=AdvancedDeployment=true,MinReadySecondsStrategy=true"}]' + kubectl rollout status deployment/kruise-rollout-controller-manager -n kruise-rollout --timeout=120s for ((i=1;i<10;i++)); do set +e From 590470c1e628e3d921485cda1965f34238bb1ad6 Mon Sep 17 00:00:00 2001 From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Date: Mon, 15 Jun 2026 09:49:41 +0800 Subject: [PATCH 12/22] test: stabilize minready e2e setup Signed-off-by: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> --- test/e2e/deployment_minready_helpers_test.go | 16 ++++++++++++---- test/e2e/deployment_minready_pdb_test.go | 6 ++---- ...deployment_minready_scenarios_helper_test.go | 3 ++- test/e2e/deployment_minready_test.go | 17 ++++++----------- 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/test/e2e/deployment_minready_helpers_test.go b/test/e2e/deployment_minready_helpers_test.go index bb10f534..8be80225 100644 --- a/test/e2e/deployment_minready_helpers_test.go +++ b/test/e2e/deployment_minready_helpers_test.go @@ -108,6 +108,16 @@ func createMinReadyE2EObject(object client.Object) { Expect(k8sClient.Create(context.TODO(), object)).NotTo(HaveOccurred()) } +func createReadyMinReadyE2EDeployment(namespace string, deployment *apps.Deployment) { + createMinReadyE2EObject(deployment) + waitMinReadyE2EDeploymentReady(namespace) +} + +func createHealthyMinReadyE2ERollout(namespace string, rollout *v1beta1.Rollout) { + createMinReadyE2EObject(rollout) + waitMinReadyE2ERolloutPhase(namespace, rollout.Name, v1beta1.RolloutPhaseHealthy) +} + func updateMinReadyE2EDeploymentVersion(namespace, version string) { Expect(retry.RetryOnConflict(retry.DefaultRetry, func() error { deployment := &apps.Deployment{} @@ -208,10 +218,8 @@ func waitMinReadyE2EBatchCondition(namespace, name, reason string) { func startMinReadyE2ERollout(namespace string) *v1beta1.Rollout { rollout := newMinReadyE2ERollout(namespace) deployment := newMinReadyE2EDeployment(namespace) - createMinReadyE2EObject(rollout) - createMinReadyE2EObject(deployment) - waitMinReadyE2EDeploymentReady(namespace) - waitMinReadyE2ERolloutPhase(namespace, rollout.Name, v1beta1.RolloutPhaseHealthy) + createReadyMinReadyE2EDeployment(namespace, deployment) + createHealthyMinReadyE2ERollout(namespace, rollout) updateMinReadyE2EDeploymentVersion(namespace, "version2") waitMinReadyE2EDeploymentInflated(namespace) waitMinReadyE2EBatchCondition(namespace, rollout.Name, "MinReadyInitialized") diff --git a/test/e2e/deployment_minready_pdb_test.go b/test/e2e/deployment_minready_pdb_test.go index 4e0dbefb..4a11e128 100644 --- a/test/e2e/deployment_minready_pdb_test.go +++ b/test/e2e/deployment_minready_pdb_test.go @@ -55,12 +55,10 @@ var _ = SIGDescribe("Deployment MinReadySeconds PDB", func() { rollout := newMinReadyE2ERollout(namespace) deployment := newMinReadyE2EDeployment(namespace) pdb := newMinReadyE2EPDB(namespace) - createMinReadyE2EObject(rollout) - createMinReadyE2EObject(deployment) + createReadyMinReadyE2EDeployment(namespace, deployment) createMinReadyE2EObject(pdb) - waitMinReadyE2EDeploymentReady(namespace) - waitMinReadyE2ERolloutPhase(namespace, rollout.Name, v1beta1.RolloutPhaseHealthy) + createHealthyMinReadyE2ERollout(namespace, rollout) updateMinReadyE2EDeploymentVersion(namespace, "version2") waitMinReadyE2EBatchCondition(namespace, rollout.Name, "MinReadyInitialized") diff --git a/test/e2e/deployment_minready_scenarios_helper_test.go b/test/e2e/deployment_minready_scenarios_helper_test.go index 24c6a140..5f18801c 100644 --- a/test/e2e/deployment_minready_scenarios_helper_test.go +++ b/test/e2e/deployment_minready_scenarios_helper_test.go @@ -39,7 +39,8 @@ func waitMinReadyE2EDeploymentReplicas(namespace string, replicas int32) { deployment := &apps.Deployment{} key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} Expect(k8sClient.Get(context.TODO(), key, deployment)).Should(Succeed()) - return deployment.Status.Replicas == replicas && *deployment.Spec.Replicas == replicas + return *deployment.Spec.Replicas == replicas && + deployment.Status.ObservedGeneration >= deployment.Generation }, 5*time.Minute, time.Second).Should(BeTrue()) } diff --git a/test/e2e/deployment_minready_test.go b/test/e2e/deployment_minready_test.go index 2d2dc6f2..d3004ab4 100644 --- a/test/e2e/deployment_minready_test.go +++ b/test/e2e/deployment_minready_test.go @@ -57,10 +57,8 @@ var _ = SIGDescribe("Deployment MinReadySeconds", func() { It("TC2 rollback returns to the stable template", func() { rollout := newMinReadyE2ERollout(namespace) deployment := newMinReadyE2EDeployment(namespace) - createMinReadyE2EObject(rollout) - createMinReadyE2EObject(deployment) - waitMinReadyE2EDeploymentReady(namespace) - waitMinReadyE2ERolloutPhase(namespace, rollout.Name, v1beta1.RolloutPhaseHealthy) + createReadyMinReadyE2EDeployment(namespace, deployment) + createHealthyMinReadyE2ERollout(namespace, rollout) updateMinReadyE2EDeploymentVersion(namespace, "version2") waitMinReadyE2EDeploymentInflated(namespace) @@ -96,10 +94,8 @@ var _ = SIGDescribe("Deployment MinReadySeconds", func() { rollout := makeMinReadyE2ERolloutWithReplicas(namespace, "25%", "50%", "100%") deployment := newMinReadyE2EDeployment(namespace) setMinReadyE2EInitialReplicas(deployment, 4) - createMinReadyE2EObject(rollout) - createMinReadyE2EObject(deployment) - waitMinReadyE2EDeploymentReady(namespace) - waitMinReadyE2ERolloutPhase(namespace, rollout.Name, v1beta1.RolloutPhaseHealthy) + createReadyMinReadyE2EDeployment(namespace, deployment) + createHealthyMinReadyE2ERollout(namespace, rollout) updateMinReadyE2EDeploymentVersion(namespace, "version2") waitMinReadyE2EDeploymentInflated(namespace) @@ -134,15 +130,14 @@ var _ = SIGDescribe("Deployment MinReadySeconds", func() { It("TC8 missing annotation blocks finalize until the operator restores it", func() { rollout := startMinReadyE2ERollout(namespace) + waitMinReadyE2ERolloutStepPaused(namespace, rollout.Name, 1) deleteMinReadyE2EOriginalAnnotation(namespace, partitiondeployment.AnnotationOriginalMaxUnavailable) resumeMinReadyE2ERollout(namespace, rollout.Name) - resumeMinReadyE2ERollout(namespace, rollout.Name) waitMinReadyE2EBatchCondition(namespace, rollout.Name, "MinReadyDegradedMissingAnnotations") waitMinReadyE2EEventReason(namespace, "MinReadyDegradedMissingAnnotations") restoreMinReadyE2EOriginalAnnotation(namespace, partitiondeployment.AnnotationOriginalMaxUnavailable, "25%") - waitMinReadyE2ERolloutPhase(namespace, rollout.Name, v1beta1.RolloutPhaseHealthy) - waitMinReadyE2EDeploymentRestored(namespace) + finishMinReadyE2ERollout(namespace, rollout.Name) }) }) }) From b8248347e548577905c4055b183d8d2fc198f652 Mon Sep 17 00:00:00 2001 From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Date: Mon, 15 Jun 2026 20:22:12 +0800 Subject: [PATCH 13/22] test: stabilize deployment minready e2e Signed-off-by: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> --- .../deployment/minready_control.go | 7 +-- test/e2e/deployment_minready_actions_test.go | 8 +-- test/e2e/deployment_minready_helpers_test.go | 55 +++++++++++++++++-- test/e2e/deployment_minready_test.go | 9 +-- 4 files changed, 60 insertions(+), 19 deletions(-) diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go index 128d959c..f8ff720b 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go @@ -101,11 +101,8 @@ func (mc *MinReadyControl) UpgradeBatch(ctx context.Context, batchContext *batch // maxUnavailable above the batch target is a legal state after a // scale-down (HPA or manual) and also self-heals external tampering; // converge it back to the target instead of reporting degraded drift. - klog.InfoS("MinReady maxUnavailable exceeds target, reducing", - "batch", batchContext.CurrentBatch, - "deployment", klog.KObj(mc.object), - "maxUnavailable", current, - "target", target) + klog.Warningf("MinReady maxUnavailable exceeds target, reducing, batch=%d deployment=%s maxUnavailable=%d target=%d", + batchContext.CurrentBatch, klog.KObj(mc.object), current, target) } original := mc.object modified := mc.object.DeepCopy() diff --git a/test/e2e/deployment_minready_actions_test.go b/test/e2e/deployment_minready_actions_test.go index 6b024efb..e4975b72 100644 --- a/test/e2e/deployment_minready_actions_test.go +++ b/test/e2e/deployment_minready_actions_test.go @@ -35,16 +35,12 @@ import ( ) func finishMinReadyE2ERollout(namespace, name string) { - resumeMinReadyE2ERollout(namespace, name) - resumeMinReadyE2ERollout(namespace, name) - waitMinReadyE2ERolloutPhase(namespace, name, v1beta1.RolloutPhaseHealthy) + completeMinReadyE2ERollout(namespace, name) waitMinReadyE2EDeploymentRestored(namespace) } func finishMinReadyE2ERolloutWithAvailability(namespace, name string, minReadySeconds, progressDeadlineSeconds int32) { - resumeMinReadyE2ERollout(namespace, name) - resumeMinReadyE2ERollout(namespace, name) - waitMinReadyE2ERolloutPhase(namespace, name, v1beta1.RolloutPhaseHealthy) + completeMinReadyE2ERollout(namespace, name) waitMinReadyE2EDeploymentRestoredWithAvailability(namespace, minReadySeconds, progressDeadlineSeconds) } diff --git a/test/e2e/deployment_minready_helpers_test.go b/test/e2e/deployment_minready_helpers_test.go index 8be80225..3cab6115 100644 --- a/test/e2e/deployment_minready_helpers_test.go +++ b/test/e2e/deployment_minready_helpers_test.go @@ -165,13 +165,60 @@ func resumeMinReadyE2ERollout(namespace, name string) { }, 2*time.Minute, time.Second).Should(BeTrue()) } +func completeMinReadyE2ERollout(namespace, name string) { + Eventually(func() string { + rollout := &v1beta1.Rollout{} + key := types.NamespacedName{Namespace: namespace, Name: name} + if err := k8sClient.Get(context.TODO(), key, rollout); err != nil { + return fmt.Sprintf("get rollout failed: %v", err) + } + status := minReadyE2ERolloutStatus(rollout) + if rollout.Status.Phase == v1beta1.RolloutPhaseHealthy { + return status + } + if rollout.Status.CanaryStatus == nil || + rollout.Status.CanaryStatus.CurrentStepState != v1beta1.CanaryStepStatePaused { + return status + } + body := fmt.Sprintf(`{"status":{"canaryStatus":{"currentStepState":"%s"}}}`, v1beta1.CanaryStepStateReady) + if err := k8sClient.Status().Patch(context.TODO(), rollout, client.RawPatch(types.MergePatchType, []byte(body))); err != nil { + return fmt.Sprintf("%s patch ready failed: %v", status, err) + } + return status + }, 10*time.Minute, time.Second).Should(HavePrefix(fmt.Sprintf("phase=%s", v1beta1.RolloutPhaseHealthy))) +} + func waitMinReadyE2ERolloutPhase(namespace, name string, phase v1beta1.RolloutPhase) { - Eventually(func() bool { + Eventually(func() string { rollout := &v1beta1.Rollout{} key := types.NamespacedName{Namespace: namespace, Name: name} - Expect(k8sClient.Get(context.TODO(), key, rollout)).NotTo(HaveOccurred()) - return rollout.Status.Phase == phase - }, 10*time.Minute, time.Second).Should(BeTrue()) + if err := k8sClient.Get(context.TODO(), key, rollout); err != nil { + return fmt.Sprintf("get rollout failed: %v", err) + } + return minReadyE2ERolloutStatus(rollout) + }, 10*time.Minute, time.Second).Should(HavePrefix(fmt.Sprintf("phase=%s", phase))) +} + +func minReadyE2ERolloutStatus(rollout *v1beta1.Rollout) string { + status := fmt.Sprintf( + "phase=%s rolloutStep=%d rolloutState=%s message=%q", + rollout.Status.Phase, + rollout.Status.CurrentStepIndex, + rollout.Status.CurrentStepState, + rollout.Status.Message, + ) + if rollout.Status.CanaryStatus == nil { + return status + " canary=" + } + canary := rollout.Status.CanaryStatus + return fmt.Sprintf( + "%s canaryStep=%d nextStep=%d canaryState=%s canaryMessage=%q", + status, + canary.CurrentStepIndex, + canary.NextStepIndex, + canary.CurrentStepState, + canary.Message, + ) } func waitMinReadyE2EDeploymentInflated(namespace string) { diff --git a/test/e2e/deployment_minready_test.go b/test/e2e/deployment_minready_test.go index d3004ab4..49b6ab2e 100644 --- a/test/e2e/deployment_minready_test.go +++ b/test/e2e/deployment_minready_test.go @@ -62,8 +62,10 @@ var _ = SIGDescribe("Deployment MinReadySeconds", func() { updateMinReadyE2EDeploymentVersion(namespace, "version2") waitMinReadyE2EDeploymentInflated(namespace) + waitMinReadyE2ERolloutStepPaused(namespace, rollout.Name, 1) updateMinReadyE2EDeploymentVersion(namespace, "version1") waitMinReadyE2ERolloutPhase(namespace, rollout.Name, v1beta1.RolloutPhaseHealthy) + waitMinReadyE2EDeploymentRestored(namespace) expectMinReadyE2EDeploymentVersion(namespace, "version1") }) @@ -117,15 +119,14 @@ var _ = SIGDescribe("Deployment MinReadySeconds", func() { expectMinReadyE2EOriginalAnnotationAbsent(namespace) }) - It("TC7 GitOps drift records degraded status and preserves the external value", func() { + It("TC7 external maxUnavailable drift converges to the current batch target", func() { rollout := startMinReadyE2ERollout(namespace) waitMinReadyE2ERolloutStepPaused(namespace, rollout.Name, 1) patchMinReadyE2EMaxUnavailable(namespace, 5) resumeMinReadyE2ERollout(namespace, rollout.Name) - waitMinReadyE2EBatchMetricCondition(namespace, rollout.Name, "MinReadyDegradedDriftDetected") - waitMinReadyE2EEventReason(namespace, "MinReadyDegradedDriftDetected") - expectMinReadyE2EInflatedMaxUnavailable(namespace, 5) + expectMinReadyE2EInflatedMaxUnavailable(namespace, 3) + finishMinReadyE2ERollout(namespace, rollout.Name) }) It("TC8 missing annotation blocks finalize until the operator restores it", func() { From 1e7fa9797349079704c6e6d80446e2ceab3382af Mon Sep 17 00:00:00 2001 From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Date: Mon, 15 Jun 2026 21:35:08 +0800 Subject: [PATCH 14/22] test: fix race condition in minready e2e TC7 Signed-off-by: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> --- test/e2e/deployment_minready_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/test/e2e/deployment_minready_test.go b/test/e2e/deployment_minready_test.go index 49b6ab2e..6704d083 100644 --- a/test/e2e/deployment_minready_test.go +++ b/test/e2e/deployment_minready_test.go @@ -123,7 +123,6 @@ var _ = SIGDescribe("Deployment MinReadySeconds", func() { rollout := startMinReadyE2ERollout(namespace) waitMinReadyE2ERolloutStepPaused(namespace, rollout.Name, 1) patchMinReadyE2EMaxUnavailable(namespace, 5) - resumeMinReadyE2ERollout(namespace, rollout.Name) expectMinReadyE2EInflatedMaxUnavailable(namespace, 3) finishMinReadyE2ERollout(namespace, rollout.Name) From c762f82aea2f646b72ea85998486e2780d82c5bd Mon Sep 17 00:00:00 2001 From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Date: Mon, 15 Jun 2026 22:01:44 +0800 Subject: [PATCH 15/22] test: fix TC7 race and organize minready e2e sub-package Use a 4-step rollout (60% keeps target=3) so TC7 resume no longer jumps past the convergence assertion. Move MinReady E2E into test/e2e/minready, update CI workflows, and emit structured warning logs for maxUnavailable convergence. Signed-off-by: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Co-authored-by: Cursor --- .../e2e-deployment-minready-1.24.yaml | 2 +- .../e2e-deployment-minready-1.26.yaml | 2 +- .../e2e-deployment-minready-1.28.yaml | 2 +- .../deployment/minready_control.go | 18 ++- .../deployment_minready_actions_test.go | 13 +- .../deployment_minready_helpers_test.go | 2 +- .../deployment_minready_pdb_test.go | 2 +- ...ployment_minready_scenarios_helper_test.go | 2 +- .../deployment_minready_test.go | 13 +- test/e2e/minready/suite_test.go | 111 ++++++++++++++++++ 10 files changed, 156 insertions(+), 11 deletions(-) rename test/e2e/{ => minready}/deployment_minready_actions_test.go (95%) rename test/e2e/{ => minready}/deployment_minready_helpers_test.go (99%) rename test/e2e/{ => minready}/deployment_minready_pdb_test.go (99%) rename test/e2e/{ => minready}/deployment_minready_scenarios_helper_test.go (99%) rename test/e2e/{ => minready}/deployment_minready_test.go (90%) create mode 100644 test/e2e/minready/suite_test.go diff --git a/.github/workflows/e2e-deployment-minready-1.24.yaml b/.github/workflows/e2e-deployment-minready-1.24.yaml index 7e58fcac..a1b746f7 100644 --- a/.github/workflows/e2e-deployment-minready-1.24.yaml +++ b/.github/workflows/e2e-deployment-minready-1.24.yaml @@ -104,7 +104,7 @@ jobs: export KUBECONFIG=/home/runner/.kube/config make ginkgo set +e - ./bin/ginkgo -timeout 60m -v --focus='Deployment MinReadySeconds' test/e2e + ./bin/ginkgo -timeout 60m -v test/e2e/minready retVal=$? if [ "${retVal}" -ne 0 ];then echo "test fail, dump kruise-rollout logs" diff --git a/.github/workflows/e2e-deployment-minready-1.26.yaml b/.github/workflows/e2e-deployment-minready-1.26.yaml index 7be2c74c..cb3e8e6d 100644 --- a/.github/workflows/e2e-deployment-minready-1.26.yaml +++ b/.github/workflows/e2e-deployment-minready-1.26.yaml @@ -104,7 +104,7 @@ jobs: export KUBECONFIG=/home/runner/.kube/config make ginkgo set +e - ./bin/ginkgo -timeout 60m -v --focus='Deployment MinReadySeconds' test/e2e + ./bin/ginkgo -timeout 60m -v test/e2e/minready retVal=$? if [ "${retVal}" -ne 0 ];then echo "test fail, dump kruise-rollout logs" diff --git a/.github/workflows/e2e-deployment-minready-1.28.yaml b/.github/workflows/e2e-deployment-minready-1.28.yaml index d3e23974..33ffca81 100644 --- a/.github/workflows/e2e-deployment-minready-1.28.yaml +++ b/.github/workflows/e2e-deployment-minready-1.28.yaml @@ -104,7 +104,7 @@ jobs: export KUBECONFIG=/home/runner/.kube/config make ginkgo set +e - ./bin/ginkgo -timeout 60m -v --focus='Deployment MinReadySeconds' test/e2e + ./bin/ginkgo -timeout 60m -v test/e2e/minready retVal=$? if [ "${retVal}" -ne 0 ];then echo "test fail, dump kruise-rollout logs" diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go index f8ff720b..686a63f3 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go @@ -19,6 +19,7 @@ package deployment import ( "context" "fmt" + "strings" apps "k8s.io/api/apps/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -101,8 +102,9 @@ func (mc *MinReadyControl) UpgradeBatch(ctx context.Context, batchContext *batch // maxUnavailable above the batch target is a legal state after a // scale-down (HPA or manual) and also self-heals external tampering; // converge it back to the target instead of reporting degraded drift. - klog.Warningf("MinReady maxUnavailable exceeds target, reducing, batch=%d deployment=%s maxUnavailable=%d target=%d", - batchContext.CurrentBatch, klog.KObj(mc.object), current, target) + klog.Warning(minReadyWarningS("MinReady maxUnavailable exceeds target, reducing", + "batch", batchContext.CurrentBatch, "deployment", klog.KObj(mc.object), + "maxUnavailable", current, "target", target)) } original := mc.object modified := mc.object.DeepCopy() @@ -380,6 +382,18 @@ func applyOriginalDeploymentStrategy(deployment *apps.Deployment, original *orig // EventDegradedDriftDetected is the warning event reason recorded when // external drift of the inflated fields is detected. It equals the sentinel // error text so events, metrics and errors.Is classification stay in sync. +// minReadyWarningS formats structured key-value pairs for warning logs. The +// project's klog v2.120.1 does not expose WarningS; keep the same call shape +// used elsewhere (ErrorS/InfoS) while emitting at warning severity. +func minReadyWarningS(msg string, keysAndValues ...interface{}) string { + var b strings.Builder + b.WriteString(msg) + for i := 0; i+1 < len(keysAndValues); i += 2 { + fmt.Fprintf(&b, " %v=%v", keysAndValues[i], keysAndValues[i+1]) + } + return b.String() +} + var EventDegradedDriftDetected = partitionstyle.ErrMinReadyDriftDetected.Error() var _ partitionstyle.Interface = (*MinReadyControl)(nil) diff --git a/test/e2e/deployment_minready_actions_test.go b/test/e2e/minready/deployment_minready_actions_test.go similarity index 95% rename from test/e2e/deployment_minready_actions_test.go rename to test/e2e/minready/deployment_minready_actions_test.go index e4975b72..48965eac 100644 --- a/test/e2e/deployment_minready_actions_test.go +++ b/test/e2e/minready/deployment_minready_actions_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package e2e +package minready import ( "context" @@ -34,6 +34,17 @@ import ( "github.com/openkruise/rollouts/api/v1beta1" ) +func mergeEnvVar(original []corev1.EnvVar, add corev1.EnvVar) []corev1.EnvVar { + newEnvs := make([]corev1.EnvVar, 0, len(original)+1) + for _, env := range original { + if add.Name == env.Name { + continue + } + newEnvs = append(newEnvs, env) + } + return append(newEnvs, add) +} + func finishMinReadyE2ERollout(namespace, name string) { completeMinReadyE2ERollout(namespace, name) waitMinReadyE2EDeploymentRestored(namespace) diff --git a/test/e2e/deployment_minready_helpers_test.go b/test/e2e/minready/deployment_minready_helpers_test.go similarity index 99% rename from test/e2e/deployment_minready_helpers_test.go rename to test/e2e/minready/deployment_minready_helpers_test.go index 3cab6115..06996624 100644 --- a/test/e2e/deployment_minready_helpers_test.go +++ b/test/e2e/minready/deployment_minready_helpers_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package e2e +package minready import ( "context" diff --git a/test/e2e/deployment_minready_pdb_test.go b/test/e2e/minready/deployment_minready_pdb_test.go similarity index 99% rename from test/e2e/deployment_minready_pdb_test.go rename to test/e2e/minready/deployment_minready_pdb_test.go index 4a11e128..78ceb57d 100644 --- a/test/e2e/deployment_minready_pdb_test.go +++ b/test/e2e/minready/deployment_minready_pdb_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package e2e +package minready import ( "context" diff --git a/test/e2e/deployment_minready_scenarios_helper_test.go b/test/e2e/minready/deployment_minready_scenarios_helper_test.go similarity index 99% rename from test/e2e/deployment_minready_scenarios_helper_test.go rename to test/e2e/minready/deployment_minready_scenarios_helper_test.go index 5f18801c..68b574c8 100644 --- a/test/e2e/deployment_minready_scenarios_helper_test.go +++ b/test/e2e/minready/deployment_minready_scenarios_helper_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package e2e +package minready import ( "context" diff --git a/test/e2e/deployment_minready_test.go b/test/e2e/minready/deployment_minready_test.go similarity index 90% rename from test/e2e/deployment_minready_test.go rename to test/e2e/minready/deployment_minready_test.go index 6704d083..696e9f62 100644 --- a/test/e2e/deployment_minready_test.go +++ b/test/e2e/minready/deployment_minready_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package e2e +package minready import ( "context" @@ -120,9 +120,18 @@ var _ = SIGDescribe("Deployment MinReadySeconds", func() { }) It("TC7 external maxUnavailable drift converges to the current batch target", func() { - rollout := startMinReadyE2ERollout(namespace) + // 4 steps: 60% keeps target=3 on 5 replicas so resume does not jump to target=5. + rollout := makeMinReadyE2ERolloutWithReplicas(namespace, "20%", "50%", "60%", "100%") + deployment := newMinReadyE2EDeployment(namespace) + createReadyMinReadyE2EDeployment(namespace, deployment) + createHealthyMinReadyE2ERollout(namespace, rollout) + updateMinReadyE2EDeploymentVersion(namespace, "version2") + waitMinReadyE2EDeploymentInflated(namespace) + waitMinReadyE2EBatchCondition(namespace, rollout.Name, "MinReadyInitialized") + waitMinReadyE2ERolloutStepPaused(namespace, rollout.Name, 1) patchMinReadyE2EMaxUnavailable(namespace, 5) + resumeMinReadyE2ERollout(namespace, rollout.Name) expectMinReadyE2EInflatedMaxUnavailable(namespace, 3) finishMinReadyE2ERollout(namespace, rollout.Name) diff --git a/test/e2e/minready/suite_test.go b/test/e2e/minready/suite_test.go new file mode 100644 index 00000000..f9e5c14f --- /dev/null +++ b/test/e2e/minready/suite_test.go @@ -0,0 +1,111 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package minready + +import ( + "context" + "fmt" + "math/rand" + "os" + "path/filepath" + "strconv" + "testing" + "time" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + kruisev1alpha1 "github.com/openkruise/kruise-api/apps/v1alpha1" + kruisev1beta1 "github.com/openkruise/kruise-api/apps/v1beta1" + crdv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/config" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + gatewayv1beta1 "sigs.k8s.io/gateway-api/apis/v1beta1" + "sigs.k8s.io/yaml" + + rolloutapi "github.com/openkruise/rollouts/api" +) + +var k8sClient client.Client +var scheme = runtime.NewScheme() + +func TestMinReadyE2E(t *testing.T) { + RegisterFailHandler(Fail) + + RunSpecsWithDefaultAndCustomReporters(t, + "Deployment MinReadySeconds E2E Suite", []Reporter{}) +} + +var _ = BeforeSuite(func(done Done) { + By("Bootstrapping MinReady test environment") + rand.Seed(time.Now().UnixNano()) + logf.SetLogger(zap.New(zap.UseDevMode(true), zap.WriteTo(GinkgoWriter))) + err := clientgoscheme.AddToScheme(scheme) + Expect(err).Should(BeNil()) + err = rolloutapi.AddToScheme(scheme) + Expect(err).Should(BeNil()) + err = crdv1.AddToScheme(scheme) + Expect(err).Should(BeNil()) + err = kruisev1beta1.AddToScheme(scheme) + Expect(err).Should(BeNil()) + err = kruisev1alpha1.AddToScheme(scheme) + Expect(err).Should(BeNil()) + err = gatewayv1beta1.AddToScheme(scheme) + Expect(err).Should(BeNil()) + By("Setting up kubernetes client") + k8sClient, err = client.New(config.GetConfigOrDie(), client.Options{Scheme: scheme}) + if err != nil { + logf.Log.Error(err, "failed to create k8sClient") + Fail("setup failed") + } + By("Create the CRDs") + var httprouteCRD crdv1.CustomResourceDefinition + err = readYamlToObject("../test_data/crds/httproutes.yaml", &httprouteCRD) + Expect(err).Should(BeNil()) + err = k8sClient.Create(context.TODO(), &httprouteCRD) + if errors.IsAlreadyExists(err) { + err = nil + } + Expect(err).Should(BeNil()) + + close(done) + By("Finished setting up MinReady test environment") +}, 300) + +func readYamlToObject(path string, object runtime.Object) error { + data, err := os.ReadFile(filepath.Clean(path)) + if err != nil { + return err + } + return yaml.Unmarshal(data, object) +} + +func randomNamespaceName(basic string) string { + return fmt.Sprintf("%s-%s", basic, strconv.FormatInt(rand.Int63(), 16)) +} + +func SIGDescribe(text string, body func()) bool { + return Describe("[rollouts] "+text, body) +} + +func KruiseDescribe(text string, body func()) bool { + return Describe("[kruise.io] "+text, body) +} From 79a05d9690b85185cbf4f5245d4b6466e20ba08d Mon Sep 17 00:00:00 2001 From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Date: Mon, 15 Jun 2026 23:41:26 +0800 Subject: [PATCH 16/22] fix: address MinReady review follow-ups on status and annotations Move MinReady status/metrics recording into MinReadyControl lifecycle methods, simplify control_plane to generic logging for non-MinReady paths, add structured warningS logging, and consolidate original annotation prepare/enroll helpers per review feedback. Signed-off-by: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Co-authored-by: Cursor --- .../control/partitionstyle/control_plane.go | 72 +++---- .../partitionstyle/control_plane_test.go | 87 +++++++- .../deployment/minready_control.go | 189 +++++++++++++----- .../partitionstyle/deployment/minready_log.go | 42 ++++ .../control/partitionstyle/minready_status.go | 112 +++++++---- .../partitionstyle/minready_status_test.go | 24 +-- 6 files changed, 385 insertions(+), 141 deletions(-) create mode 100644 pkg/controller/batchrelease/control/partitionstyle/deployment/minready_log.go diff --git a/pkg/controller/batchrelease/control/partitionstyle/control_plane.go b/pkg/controller/batchrelease/control/partitionstyle/control_plane.go index f341fd6c..74a00c04 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/control_plane.go +++ b/pkg/controller/batchrelease/control/partitionstyle/control_plane.go @@ -68,24 +68,36 @@ func nonNilContext(ctx context.Context) context.Context { return context.Background() } +func (rc *realBatchControlPlane) bindMinReadyStatus(controller Interface) { + if binder, ok := controller.(MinReadyStatusBinder); ok { + binder.BindMinReadyStatus(rc.release, rc.newStatus, rc.EventRecorder) + } +} + +func (rc *realBatchControlPlane) reportOperationFailed(controller Interface, reason string, err error) { + if err == nil { + return + } + if lifecycle, ok := controller.(MinReadyLifecycle); ok { + lifecycle.RecordOperationFailed(reason, err) + return + } + klog.ErrorS(err, "Partition-style control plane failed", "release", klog.KObj(rc.release), "reason", reason) +} + func (rc *realBatchControlPlane) Initialize() error { - minReady := isMinReadyController(rc.Interface) controller, err := rc.BuildController() if err != nil { - rc.recordMinReadyDegradedOrLog(minReady, "MinReadyInitializeFailed", err) + rc.reportOperationFailed(rc.Interface, "MinReadyInitializeFailed", err) return err } - minReady = isMinReadyController(controller) + rc.bindMinReadyStatus(controller) // claim workload under our control err = controller.Initialize(rc.ctx, rc.release) if err != nil { - rc.recordMinReadyDegradedOrLog(minReady, "MinReadyInitializeFailed", err) return err } - if minReady { - rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyInitialized, "MinReadyInitialized", "MinReadySeconds strategy initialized") - } // record revision and replicas workloadInfo := controller.GetWorkloadInfo() @@ -102,30 +114,28 @@ func (rc *realBatchControlPlane) Initialize() error { } func (rc *realBatchControlPlane) UpgradeBatch() error { - minReady := isMinReadyController(rc.Interface) controller, err := rc.BuildController() if err != nil { - rc.recordMinReadyDegradedOrLog(minReady, "MinReadyBatchingFailed", err) + rc.reportOperationFailed(rc.Interface, "MinReadyBatchingFailed", err) return err } - minReady = isMinReadyController(controller) + rc.bindMinReadyStatus(controller) if controller.GetWorkloadInfo().Replicas == 0 { - if minReady { - rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatching", "MinReadySeconds strategy has no replicas to upgrade") + if lifecycle, ok := controller.(MinReadyLifecycle); ok { + lifecycle.RecordZeroReplicaBatching() } return nil } err = rc.countAndUpdateNoNeedUpdateReplicas() if err != nil { - rc.recordMinReadyDegradedOrLog(minReady, "MinReadyBatchingFailed", err) + rc.reportOperationFailed(controller, "MinReadyBatchingFailed", err) return err } batchContext, err := controller.CalculateBatchContext(rc.release) if err != nil { - rc.recordMinReadyDegradedOrLog(minReady, "MinReadyBatchingFailed", err) return err } klog.Infof("BatchRelease %v calculated context when upgrade batch: %s", @@ -133,32 +143,30 @@ func (rc *realBatchControlPlane) UpgradeBatch() error { err = controller.UpgradeBatch(rc.ctx, batchContext) if err != nil { - rc.recordMinReadyDegradedOrLog(minReady, "MinReadyBatchingFailed", err) return err } if err := rc.patcher.PatchPodBatchLabel(batchContext); err != nil { - rc.recordMinReadyDegradedOrLog(minReady, "MinReadyBatchingFailed", err) + rc.reportOperationFailed(controller, "MinReadyBatchingFailed", err) return err } - if minReady { - rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatching", "MinReadySeconds strategy advanced the current batch") + if lifecycle, ok := controller.(MinReadyLifecycle); ok { + lifecycle.RecordBatchAdvanced() } return nil } func (rc *realBatchControlPlane) EnsureBatchPodsReadyAndLabeled() error { - minReady := isMinReadyController(rc.Interface) controller, err := rc.BuildController() if err != nil { - rc.recordMinReadyDegradedOrLog(minReady, "MinReadyBatchingFailed", err) + rc.reportOperationFailed(rc.Interface, "MinReadyBatchingFailed", err) return err } - minReady = isMinReadyController(controller) + rc.bindMinReadyStatus(controller) if controller.GetWorkloadInfo().Replicas == 0 { - if minReady { - rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatchReady", "MinReadySeconds strategy batch is ready") + if lifecycle, ok := controller.(MinReadyLifecycle); ok { + lifecycle.RecordZeroReplicaBatchReady() } return nil } @@ -167,7 +175,6 @@ func (rc *realBatchControlPlane) EnsureBatchPodsReadyAndLabeled() error { // the target calculated should be consistent with UpgradeBatch. batchContext, err := controller.CalculateBatchContext(rc.release) if err != nil { - rc.recordMinReadyDegradedOrLog(minReady, "MinReadyBatchingFailed", err) return err } @@ -175,37 +182,32 @@ func (rc *realBatchControlPlane) EnsureBatchPodsReadyAndLabeled() error { klog.KObj(rc.release), batchContext.Log()) if err := batchContext.IsBatchReady(); err != nil { - if minReady { - observeMinReadyBatchWait(rc.release, util.GetBatchReleaseCondition(*rc.newStatus, v1beta1.RolloutConditionMinReadyBatching)) + if lifecycle, ok := controller.(MinReadyLifecycle); ok { + lifecycle.ObserveBatchWait() } return err } - if minReady { - rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatchReady", "MinReadySeconds strategy batch is ready") + if lifecycle, ok := controller.(MinReadyLifecycle); ok { + lifecycle.RecordBatchReady() } return nil } func (rc *realBatchControlPlane) Finalize() error { - minReady := isMinReadyController(rc.Interface) controller, err := rc.BuildController() if err != nil { if err := client.IgnoreNotFound(err); err != nil { - rc.recordMinReadyDegradedOrLog(minReady, "MinReadyFinalizeFailed", err) + rc.reportOperationFailed(rc.Interface, "MinReadyFinalizeFailed", err) return err } return nil } - minReady = isMinReadyController(controller) + rc.bindMinReadyStatus(controller) // release workload control info and clean up resources if it needs if err := controller.Finalize(rc.ctx, rc.release); err != nil { - rc.recordMinReadyDegradedOrLog(minReady, "MinReadyFinalizeFailed", err) return err } - if minReady { - rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyFinalized, "MinReadyFinalized", "MinReadySeconds strategy finalized") - } return nil } diff --git a/pkg/controller/batchrelease/control/partitionstyle/control_plane_test.go b/pkg/controller/batchrelease/control/partitionstyle/control_plane_test.go index a1f3ac99..f71adf57 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/control_plane_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/control_plane_test.go @@ -60,6 +60,8 @@ type fakePartitionController struct { finalizeCalls int calculateCalls int listCalls int + + statusWriter *MinReadyStatusWriter } func (f *fakePartitionController) BuildController() (Interface, error) { @@ -73,6 +75,54 @@ func (f *fakePartitionController) BuildController() (Interface, error) { return f, nil } +func (f *fakePartitionController) BindMinReadyStatus(release *v1beta1.BatchRelease, status *v1beta1.BatchReleaseStatus, recorder record.EventRecorder) { + if f.minReady { + f.statusWriter = NewMinReadyStatusWriter(release, status, recorder) + } +} + +func (f *fakePartitionController) RecordOperationFailed(reason string, err error) { + if f.statusWriter != nil { + f.statusWriter.RecordDegraded(reason, err) + } +} + +func (f *fakePartitionController) RecordZeroReplicaBatching() { + if f.statusWriter != nil { + f.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatching", "MinReadySeconds strategy has no replicas to upgrade") + } +} + +func (f *fakePartitionController) RecordBatchAdvanced() { + if f.statusWriter != nil { + f.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatching", "MinReadySeconds strategy advanced the current batch") + } +} + +func (f *fakePartitionController) RecordZeroReplicaBatchReady() { + if f.statusWriter != nil { + f.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatchReady", "MinReadySeconds strategy batch is ready") + } +} + +func (f *fakePartitionController) RecordBatchReady() { + if f.statusWriter != nil { + f.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatchReady", "MinReadySeconds strategy batch is ready") + } +} + +func (f *fakePartitionController) ObserveBatchWait() { + if f.statusWriter == nil { + return + } + status := f.statusWriter.BatchReleaseStatus() + if status == nil { + return + } + condition := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyBatching) + ObserveMinReadyBatchWait(f.statusWriter.BatchRelease(), condition) +} + func (f *fakePartitionController) GetWorkloadInfo() *util.WorkloadInfo { if f.workloadInfo != nil { return f.workloadInfo @@ -85,9 +135,12 @@ func (f *fakePartitionController) ListOwnedPods() ([]*corev1.Pod, error) { return f.pods, f.listErr } -func (f *fakePartitionController) CalculateBatchContext(*v1beta1.BatchRelease) (*batchcontext.BatchContext, error) { +func (f *fakePartitionController) CalculateBatchContext(release *v1beta1.BatchRelease) (*batchcontext.BatchContext, error) { f.calculateCalls++ if f.calcErr != nil { + if f.statusWriter != nil { + f.statusWriter.RecordDegraded("MinReadyBatchingFailed", f.calcErr) + } return nil, f.calcErr } if f.batchCtx != nil { @@ -96,19 +149,43 @@ func (f *fakePartitionController) CalculateBatchContext(*v1beta1.BatchRelease) ( return readyBatchContext(), nil } -func (f *fakePartitionController) Initialize(context.Context, *v1beta1.BatchRelease) error { +func (f *fakePartitionController) Initialize(ctx context.Context, release *v1beta1.BatchRelease) error { f.initCalls++ - return f.initErr + if f.initErr != nil { + if f.statusWriter != nil { + f.statusWriter.RecordDegraded("MinReadyInitializeFailed", f.initErr) + } + return f.initErr + } + if f.statusWriter != nil { + f.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyInitialized, "MinReadyInitialized", "MinReadySeconds strategy initialized") + } + return nil } func (f *fakePartitionController) UpgradeBatch(context.Context, *batchcontext.BatchContext) error { f.upgradeCalls++ - return f.upgradeErr + if f.upgradeErr != nil { + if f.statusWriter != nil { + f.statusWriter.RecordDegraded("MinReadyBatchingFailed", f.upgradeErr) + } + return f.upgradeErr + } + return nil } func (f *fakePartitionController) Finalize(context.Context, *v1beta1.BatchRelease) error { f.finalizeCalls++ - return f.finalizeErr + if f.finalizeErr != nil { + if f.statusWriter != nil { + f.statusWriter.RecordDegraded("MinReadyFinalizeFailed", f.finalizeErr) + } + return f.finalizeErr + } + if f.statusWriter != nil { + f.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyFinalized, "MinReadyFinalized", "MinReadySeconds strategy finalized") + } + return nil } func (f *fakePartitionController) IsMinReadyControl() bool { diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go index 686a63f3..95b00996 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go @@ -19,11 +19,11 @@ package deployment import ( "context" "fmt" - "strings" apps "k8s.io/api/apps/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/tools/record" "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/client" @@ -38,12 +38,59 @@ import ( type MinReadyControl struct { *realController + statusWriter *partitionstyle.MinReadyStatusWriter } func (mc *MinReadyControl) IsMinReadyControl() bool { return true } +func (mc *MinReadyControl) BindMinReadyStatus(release *v1beta1.BatchRelease, status *v1beta1.BatchReleaseStatus, recorder record.EventRecorder) { + mc.statusWriter = partitionstyle.NewMinReadyStatusWriter(release, status, recorder) +} + +func (mc *MinReadyControl) RecordOperationFailed(reason string, err error) { + if mc.statusWriter != nil { + mc.statusWriter.RecordDegraded(reason, err) + } +} + +func (mc *MinReadyControl) RecordZeroReplicaBatching() { + if mc.statusWriter != nil { + mc.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatching", "MinReadySeconds strategy has no replicas to upgrade") + } +} + +func (mc *MinReadyControl) RecordBatchAdvanced() { + if mc.statusWriter != nil { + mc.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatching", "MinReadySeconds strategy advanced the current batch") + } +} + +func (mc *MinReadyControl) RecordZeroReplicaBatchReady() { + if mc.statusWriter != nil { + mc.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatchReady", "MinReadySeconds strategy batch is ready") + } +} + +func (mc *MinReadyControl) RecordBatchReady() { + if mc.statusWriter != nil { + mc.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatchReady", "MinReadySeconds strategy batch is ready") + } +} + +func (mc *MinReadyControl) ObserveBatchWait() { + if mc.statusWriter == nil { + return + } + status := mc.statusWriter.BatchReleaseStatus() + if status == nil { + return + } + condition := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyBatching) + partitionstyle.ObserveMinReadyBatchWait(mc.statusWriter.BatchRelease(), condition) +} + func (mc *MinReadyControl) BuildController() (partitionstyle.Interface, error) { if mc.realController == nil { return nil, fmt.Errorf("MinReadyControl.BuildController: realController is nil") @@ -56,43 +103,54 @@ func (mc *MinReadyControl) BuildController() (partitionstyle.Interface, error) { if !ok { return nil, fmt.Errorf("MinReadyControl.BuildController: expected *realController, got %T", built) } - return &MinReadyControl{realController: rc}, nil + return &MinReadyControl{realController: rc, statusWriter: mc.statusWriter}, nil } func (mc *MinReadyControl) Initialize(ctx context.Context, release *v1beta1.BatchRelease) error { if release == nil { - return fmt.Errorf("MinReadyControl.Initialize: release is nil") + err := fmt.Errorf("MinReadyControl.Initialize: release is nil") + mc.RecordOperationFailed("MinReadyInitializeFailed", err) + return err } if err := mc.ensureInitializeAllowed(); err != nil { - return fmt.Errorf("MinReadyControl.Initialize: %w", err) + wrapped := fmt.Errorf("MinReadyControl.Initialize: %w", err) + mc.RecordOperationFailed("MinReadyInitializeFailed", wrapped) + return wrapped } original := mc.object modified := mc.object.DeepCopy() - if hasAnyOriginalAnnotation(original.Annotations) { - if err := ensureOriginalAnnotations(original); err != nil { - return fmt.Errorf("MinReadyControl.Initialize: %w", err) - } - if err := validateInflatedDeploymentStrategy(original); err != nil { - return fmt.Errorf("MinReadyControl.Initialize: %w", err) - } - } else { - writeOriginalAnnotations(original, modified) + if err := prepareOriginalAnnotations(original, modified); err != nil { + wrapped := fmt.Errorf("MinReadyControl.Initialize: %w", err) + mc.RecordOperationFailed("MinReadyInitializeFailed", wrapped) + return wrapped } modified.Annotations[util.BatchReleaseControlAnnotation] = util.DumpJSON(metav1.NewControllerRef( release, release.GetObjectKind().GroupVersionKind())) inflateDeploymentStrategy(modified) patch := client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{}) - return mc.client.Patch(ctx, modified, patch) + if err := mc.client.Patch(ctx, modified, patch); err != nil { + wrapped := fmt.Errorf("MinReadyControl.Initialize: %w", err) + mc.RecordOperationFailed("MinReadyInitializeFailed", wrapped) + return wrapped + } + if mc.statusWriter != nil { + mc.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyInitialized, "MinReadyInitialized", "MinReadySeconds strategy initialized") + } + return nil } func (mc *MinReadyControl) UpgradeBatch(ctx context.Context, batchContext *batchcontext.BatchContext) error { if err := mc.ensureInflatedDeploymentStrategy(ctx); err != nil { - return fmt.Errorf("MinReadyControl.UpgradeBatch[%d]: %w", batchContext.CurrentBatch, err) + wrapped := fmt.Errorf("MinReadyControl.UpgradeBatch[%d]: %w", batchContext.CurrentBatch, err) + mc.RecordOperationFailed("MinReadyBatchingFailed", wrapped) + return wrapped } current, err := intstr.GetScaledValueFromIntOrPercent( mc.object.Spec.Strategy.RollingUpdate.MaxUnavailable, int(batchContext.Replicas), true) if err != nil { - return fmt.Errorf("MinReadyControl.UpgradeBatch[%d]: %w", batchContext.CurrentBatch, err) + wrapped := fmt.Errorf("MinReadyControl.UpgradeBatch[%d]: %w", batchContext.CurrentBatch, err) + mc.RecordOperationFailed("MinReadyBatchingFailed", wrapped) + return wrapped } target := batchContext.DesiredUpdatedReplicas if int32(current) == target { @@ -102,16 +160,21 @@ func (mc *MinReadyControl) UpgradeBatch(ctx context.Context, batchContext *batch // maxUnavailable above the batch target is a legal state after a // scale-down (HPA or manual) and also self-heals external tampering; // converge it back to the target instead of reporting degraded drift. - klog.Warning(minReadyWarningS("MinReady maxUnavailable exceeds target, reducing", + warningS(nil, "MinReady maxUnavailable exceeds target, reducing", "batch", batchContext.CurrentBatch, "deployment", klog.KObj(mc.object), - "maxUnavailable", current, "target", target)) + "maxUnavailable", current, "target", target) } original := mc.object modified := mc.object.DeepCopy() maxUnavailable := intstr.FromInt(int(target)) modified.Spec.Strategy.RollingUpdate.MaxUnavailable = &maxUnavailable patch := client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{}) - return mc.client.Patch(ctx, modified, patch) + if err := mc.client.Patch(ctx, modified, patch); err != nil { + wrapped := fmt.Errorf("MinReadyControl.UpgradeBatch[%d]: %w", batchContext.CurrentBatch, err) + mc.RecordOperationFailed("MinReadyBatchingFailed", wrapped) + return wrapped + } + return nil } func (mc *MinReadyControl) Finalize(ctx context.Context, _ *v1beta1.BatchRelease) error { @@ -120,15 +183,19 @@ func (mc *MinReadyControl) Finalize(ctx context.Context, _ *v1beta1.BatchRelease } if !hasAnyOriginalAnnotation(mc.object.Annotations) { if hasInflatedDeploymentFields(mc.object) { - return fmt.Errorf("MinReadyControl.Finalize: annotation state missing while deployment fields are still inflated: %w", + err := fmt.Errorf("MinReadyControl.Finalize: annotation state missing while deployment fields are still inflated: %w", partitionstyle.ErrMinReadyAnnotationInvalid) + mc.RecordOperationFailed("MinReadyFinalizeFailed", err) + return err } return nil } original := mc.object restored, err := parseOriginalDeploymentStrategy(original.Annotations) if err != nil { - return fmt.Errorf("MinReadyControl.Finalize: %w", err) + wrapped := fmt.Errorf("MinReadyControl.Finalize: %w", err) + mc.RecordOperationFailed("MinReadyFinalizeFailed", wrapped) + return wrapped } modified := mc.object.DeepCopy() applyOriginalDeploymentStrategy(modified, restored) @@ -138,14 +205,24 @@ func (mc *MinReadyControl) Finalize(ctx context.Context, _ *v1beta1.BatchRelease delete(modified.Annotations, util.BatchReleaseControlAnnotation) delete(modified.Labels, v1alpha1.DeploymentStableRevisionLabel) patch := client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{}) - return mc.client.Patch(ctx, modified, patch) + if err := mc.client.Patch(ctx, modified, patch); err != nil { + wrapped := fmt.Errorf("MinReadyControl.Finalize: %w", err) + mc.RecordOperationFailed("MinReadyFinalizeFailed", wrapped) + return wrapped + } + if mc.statusWriter != nil { + mc.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyFinalized, "MinReadyFinalized", "MinReadySeconds strategy finalized") + } + return nil } func (mc *MinReadyControl) CalculateBatchContext(release *v1beta1.BatchRelease) (*batchcontext.BatchContext, error) { rolloutID := release.Spec.ReleasePlan.RolloutID if rolloutID != "" { if _, err := mc.ListOwnedPods(); err != nil { - return nil, fmt.Errorf("MinReadyControl.CalculateBatchContext: %w", err) + wrapped := fmt.Errorf("MinReadyControl.CalculateBatchContext: %w", err) + mc.RecordOperationFailed("MinReadyBatchingFailed", wrapped) + return nil, wrapped } } @@ -153,11 +230,15 @@ func (mc *MinReadyControl) CalculateBatchContext(release *v1beta1.BatchRelease) desiredPartition := release.Spec.ReleasePlan.Batches[currentBatch].CanaryReplicas desiredUpdatedReplicas, err := minReadyDesiredUpdatedReplicas(desiredPartition, mc.object) if err != nil { - return nil, fmt.Errorf("MinReadyControl.CalculateBatchContext: %w", err) + wrapped := fmt.Errorf("MinReadyControl.CalculateBatchContext: %w", err) + mc.RecordOperationFailed("MinReadyBatchingFailed", wrapped) + return nil, wrapped } updatedReadyReplicas, err := mc.minReadyUpdatedReadyReplicas(release.Status.UpdateRevision) if err != nil { - return nil, fmt.Errorf("MinReadyControl.CalculateBatchContext: %w", err) + wrapped := fmt.Errorf("MinReadyControl.CalculateBatchContext: %w", err) + mc.RecordOperationFailed("MinReadyBatchingFailed", wrapped) + return nil, wrapped } return &batchcontext.BatchContext{ RolloutID: rolloutID, @@ -187,6 +268,17 @@ func (mc *MinReadyControl) ensureInitializeAllowed() error { return nil } +func prepareOriginalAnnotations(deployment, writeTarget *apps.Deployment) error { + if !hasAnyOriginalAnnotation(deployment.Annotations) { + writeOriginalAnnotations(deployment, writeTarget) + return nil + } + if err := ensureOriginalAnnotations(deployment); err != nil { + return err + } + return validateInflatedDeploymentStrategy(deployment) +} + func ensureOriginalAnnotations(deployment *apps.Deployment) error { _, err := parseOriginalDeploymentStrategy(deployment.Annotations) return err @@ -241,23 +333,30 @@ func EnrollMinReadyDeployment(deployment *apps.Deployment) error { return err } snapshot := deployment.DeepCopy() - if hasAnyOriginalAnnotation(snapshot.Annotations) { - if err := ensureOriginalAnnotations(snapshot); err != nil { + if err := enrollOriginalAnnotations(snapshot, deployment); err != nil { + return err + } + inflateDeploymentStrategy(deployment) + return nil +} + +func enrollOriginalAnnotations(snapshot, target *apps.Deployment) error { + if !hasAnyOriginalAnnotation(snapshot.Annotations) { + writeOriginalAnnotations(snapshot, target) + return nil + } + if err := ensureOriginalAnnotations(snapshot); err != nil { + return err + } + if err := validateInflatedDeploymentStrategy(snapshot); err != nil { + if !hasOriginalAvailabilityChange(snapshot) { return err } - if err := validateInflatedDeploymentStrategy(snapshot); err != nil { - if !hasOriginalAvailabilityChange(snapshot) { - return err - } - if err := validateMinReadyRefreshableDeployment(snapshot); err != nil { - return err - } - writeOriginalAvailabilityAnnotations(snapshot, deployment) + if err := validateMinReadyRefreshableDeployment(snapshot); err != nil { + return err } - } else { - writeOriginalAnnotations(snapshot, deployment) + writeOriginalAvailabilityAnnotations(snapshot, target) } - inflateDeploymentStrategy(deployment) return nil } @@ -382,18 +481,8 @@ func applyOriginalDeploymentStrategy(deployment *apps.Deployment, original *orig // EventDegradedDriftDetected is the warning event reason recorded when // external drift of the inflated fields is detected. It equals the sentinel // error text so events, metrics and errors.Is classification stay in sync. -// minReadyWarningS formats structured key-value pairs for warning logs. The -// project's klog v2.120.1 does not expose WarningS; keep the same call shape -// used elsewhere (ErrorS/InfoS) while emitting at warning severity. -func minReadyWarningS(msg string, keysAndValues ...interface{}) string { - var b strings.Builder - b.WriteString(msg) - for i := 0; i+1 < len(keysAndValues); i += 2 { - fmt.Fprintf(&b, " %v=%v", keysAndValues[i], keysAndValues[i+1]) - } - return b.String() -} - var EventDegradedDriftDetected = partitionstyle.ErrMinReadyDriftDetected.Error() var _ partitionstyle.Interface = (*MinReadyControl)(nil) +var _ partitionstyle.MinReadyStatusBinder = (*MinReadyControl)(nil) +var _ partitionstyle.MinReadyLifecycle = (*MinReadyControl)(nil) diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_log.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_log.go new file mode 100644 index 00000000..8f4cdf63 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_log.go @@ -0,0 +1,42 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "fmt" + "strings" + + "k8s.io/klog/v2" +) + +// warningS logs at warning severity with the same call shape as klog.InfoS/ErrorS. +// klog v2.120.1 does not expose WarningS, so MinReady uses this helper locally. +func warningS(err error, msg string, keysAndValues ...interface{}) { + klog.Warning(formatStructuredLog(err, msg, keysAndValues...)) +} + +func formatStructuredLog(err error, msg string, keysAndValues ...interface{}) string { + var b strings.Builder + b.WriteString(msg) + if err != nil { + fmt.Fprintf(&b, " err=%v", err) + } + for i := 0; i+1 < len(keysAndValues); i += 2 { + fmt.Fprintf(&b, " %v=%v", keysAndValues[i], keysAndValues[i+1]) + } + return b.String() +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/minready_status.go b/pkg/controller/batchrelease/control/partitionstyle/minready_status.go index 1a71812c..b2db8c05 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/minready_status.go +++ b/pkg/controller/batchrelease/control/partitionstyle/minready_status.go @@ -21,48 +21,98 @@ import ( "time" v1 "k8s.io/api/core/v1" - "k8s.io/klog/v2" + "k8s.io/client-go/tools/record" "github.com/openkruise/rollouts/api/v1beta1" brmetrics "github.com/openkruise/rollouts/pkg/controller/batchrelease/metrics" "github.com/openkruise/rollouts/pkg/util" ) -type minReadyControllerMarker interface { - IsMinReadyControl() bool +// MinReadyStatusBinder injects BatchRelease status/event dependencies into +// MinReadyControl before lifecycle methods run. +type MinReadyStatusBinder interface { + BindMinReadyStatus(release *v1beta1.BatchRelease, status *v1beta1.BatchReleaseStatus, recorder record.EventRecorder) } -func isMinReadyController(controller Interface) bool { - marker, ok := controller.(minReadyControllerMarker) - return ok && marker.IsMinReadyControl() +// MinReadyLifecycle records MinReady-specific status from control-plane batch +// paths that are not Initialize/UpgradeBatch/Finalize. +type MinReadyLifecycle interface { + RecordZeroReplicaBatching() + RecordBatchAdvanced() + RecordZeroReplicaBatchReady() + RecordBatchReady() + ObserveBatchWait() + RecordOperationFailed(reason string, err error) } -func (rc *realBatchControlPlane) recordMinReadyDegradedOrLog(minReady bool, reason string, err error) { - if minReady { - rc.recordMinReadyDegraded(reason, err) - return +type MinReadyStatusWriter struct { + release *v1beta1.BatchRelease + status *v1beta1.BatchReleaseStatus + recorder record.EventRecorder +} + +func NewMinReadyStatusWriter(release *v1beta1.BatchRelease, status *v1beta1.BatchReleaseStatus, recorder record.EventRecorder) *MinReadyStatusWriter { + return &MinReadyStatusWriter{ + release: release, + status: status, + recorder: recorder, + } +} + +func (w *MinReadyStatusWriter) BatchRelease() *v1beta1.BatchRelease { + if w == nil { + return nil } - if err != nil { - klog.ErrorS(err, "Partition-style control plane failed", "release", klog.KObj(rc.release), "reason", reason) + return w.release +} + +func (w *MinReadyStatusWriter) BatchReleaseStatus() *v1beta1.BatchReleaseStatus { + if w == nil { + return nil } + return w.status } -func (rc *realBatchControlPlane) recordMinReadyNormal(condType v1beta1.RolloutConditionType, reason, message string) { - previousCondition := util.GetBatchReleaseCondition(*rc.newStatus, condType) +func (w *MinReadyStatusWriter) RecordNormal(condType v1beta1.RolloutConditionType, reason, message string) { + if w == nil || w.status == nil { + return + } + previousCondition := util.GetBatchReleaseCondition(*w.status, condType) condition := util.NewRolloutCondition(condType, v1.ConditionTrue, reason, message) - util.SetBatchReleaseCondition(rc.newStatus, *condition) + util.SetBatchReleaseCondition(w.status, *condition) if reason == "MinReadyFinalized" { - clearMinReadyDegraded(rc.newStatus) - rc.newStatus.Message = "" + clearMinReadyDegraded(w.status) + w.status.Message = "" } if reason == "MinReadyBatchReady" { - observeMinReadyBatchDuration(rc.release, previousCondition) - brmetrics.RecordMinReadyBatch(rc.release, brmetrics.BatchResultSuccess) + observeMinReadyBatchDuration(w.release, previousCondition) + brmetrics.RecordMinReadyBatch(w.release, brmetrics.BatchResultSuccess) } if reason == "MinReadyBatchReady" || reason == "MinReadyFinalized" { - brmetrics.ClearMinReadyStuckSeconds(rc.release, brmetrics.StuckReasonBatchReadyTimeout) + brmetrics.ClearMinReadyStuckSeconds(w.release, brmetrics.StuckReasonBatchReadyTimeout) + } + if w.recorder != nil && w.release != nil { + w.recorder.Event(w.release, v1.EventTypeNormal, reason, message) + } +} + +func (w *MinReadyStatusWriter) RecordDegraded(reason string, err error) { + if w == nil || w.status == nil || err == nil { + return + } + message := err.Error() + classified := classifyMinReadyDegradedReason(reason, err) + eventReason := classified.event + condition := util.NewRolloutCondition(v1beta1.RolloutConditionMinReadyDegraded, v1.ConditionTrue, eventReason, message) + util.SetBatchReleaseCondition(w.status, *condition) + w.status.Message = message + degradedReason := classified.metric + brmetrics.ClearMinReadyStuckSeconds(w.release, brmetrics.StuckReasonBatchReadyTimeout) + brmetrics.RecordMinReadyBatch(w.release, brmetrics.BatchResultDegraded) + brmetrics.RecordMinReadyDegraded(w.release, degradedReason) + if w.recorder != nil && w.release != nil { + w.recorder.Event(w.release, v1.EventTypeWarning, eventReason, message) } - rc.Event(rc.release, v1.EventTypeNormal, reason, message) } func observeMinReadyBatchDuration(release *v1beta1.BatchRelease, condition *v1beta1.RolloutCondition) { @@ -76,24 +126,8 @@ func observeMinReadyBatchDuration(release *v1beta1.BatchRelease, condition *v1be brmetrics.ObserveMinReadyBatchDuration(release, duration) } -func (rc *realBatchControlPlane) recordMinReadyDegraded(reason string, err error) { - if err == nil { - return - } - message := err.Error() - classified := classifyMinReadyDegradedReason(reason, err) - eventReason := classified.event - condition := util.NewRolloutCondition(v1beta1.RolloutConditionMinReadyDegraded, v1.ConditionTrue, eventReason, message) - util.SetBatchReleaseCondition(rc.newStatus, *condition) - rc.newStatus.Message = message - degradedReason := classified.metric - brmetrics.ClearMinReadyStuckSeconds(rc.release, brmetrics.StuckReasonBatchReadyTimeout) - brmetrics.RecordMinReadyBatch(rc.release, brmetrics.BatchResultDegraded) - brmetrics.RecordMinReadyDegraded(rc.release, degradedReason) - rc.Event(rc.release, v1.EventTypeWarning, eventReason, message) -} - -func observeMinReadyBatchWait(release *v1beta1.BatchRelease, condition *v1beta1.RolloutCondition) { +// ObserveMinReadyBatchWait updates the stuck-seconds metric while a batch waits to become ready. +func ObserveMinReadyBatchWait(release *v1beta1.BatchRelease, condition *v1beta1.RolloutCondition) { if release == nil || condition == nil || condition.LastTransitionTime.IsZero() { return } diff --git a/pkg/controller/batchrelease/control/partitionstyle/minready_status_test.go b/pkg/controller/batchrelease/control/partitionstyle/minready_status_test.go index 43195119..8e3fd5e9 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/minready_status_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/minready_status_test.go @@ -56,13 +56,13 @@ func TestRecordMinReadyNormalObservesBatchDuration(t *testing.T) { LastUpdateTime: startedAt, }) - rc := &realBatchControlPlane{ - EventRecorder: record.NewFakeRecorder(1), - release: release, - newStatus: status, + rc := &MinReadyStatusWriter{ + release: release, + status: status, + recorder: record.NewFakeRecorder(1), } - rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatchReady", "MinReadySeconds strategy batch is ready") + rc.RecordNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatchReady", "MinReadySeconds strategy batch is ready") histogram := findHistogramMetric(t, "rollout_minready_batch_duration_seconds", map[string]string{ "rollout": release.Name, @@ -91,13 +91,13 @@ func TestRecordMinReadyNormalKeepsDegradedUntilFinalize(t *testing.T) { Status: v1.ConditionTrue, Reason: "MinReadyDegradedMissingAnnotations", }) - rc := &realBatchControlPlane{ - EventRecorder: record.NewFakeRecorder(2), - release: release, - newStatus: status, + rc := &MinReadyStatusWriter{ + release: release, + status: status, + recorder: record.NewFakeRecorder(2), } - rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatching", "MinReadySeconds strategy advanced the current batch") + rc.RecordNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatching", "MinReadySeconds strategy advanced the current batch") degraded := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyDegraded) if degraded == nil || degraded.Status != v1.ConditionTrue { @@ -107,7 +107,7 @@ func TestRecordMinReadyNormalKeepsDegradedUntilFinalize(t *testing.T) { t.Fatalf("status.message = %q, want previous degraded message", status.Message) } - rc.recordMinReadyNormal(v1beta1.RolloutConditionMinReadyFinalized, "MinReadyFinalized", "MinReadySeconds strategy finalized") + rc.RecordNormal(v1beta1.RolloutConditionMinReadyFinalized, "MinReadyFinalized", "MinReadySeconds strategy finalized") degraded = util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyDegraded) if degraded == nil || degraded.Status != v1.ConditionFalse { @@ -132,7 +132,7 @@ func TestObserveMinReadyBatchWaitSetsStuckGauge(t *testing.T) { LastUpdateTime: startedAt, } - observeMinReadyBatchWait(release, condition) + ObserveMinReadyBatchWait(release, condition) gauge := findGaugeMetric(t, "rollout_minready_stuck_seconds", map[string]string{ "rollout": release.Name, From eb6eda21e2bc633910cb7b1d218e8e4e95e662c1 Mon Sep 17 00:00:00 2001 From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Date: Tue, 16 Jun 2026 01:02:12 +0800 Subject: [PATCH 17/22] fix: heal maxUnavailable drift while batch release waits paused Converge external maxUnavailable tampering during EnsureBatchPodsReadyAndLabeled so TC7 does not depend on UpgradeBatch running only after rollout resume. Signed-off-by: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Co-authored-by: Cursor --- .../control/partitionstyle/control_plane.go | 6 +++ .../deployment/minready_control.go | 37 ++++++++++++++++++- .../deployment/minready_control_test.go | 23 ++++++++++++ .../control/partitionstyle/minready_status.go | 9 +++++ test/e2e/minready/deployment_minready_test.go | 2 + 5 files changed, 75 insertions(+), 2 deletions(-) diff --git a/pkg/controller/batchrelease/control/partitionstyle/control_plane.go b/pkg/controller/batchrelease/control/partitionstyle/control_plane.go index 74a00c04..442dcaeb 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/control_plane.go +++ b/pkg/controller/batchrelease/control/partitionstyle/control_plane.go @@ -181,6 +181,12 @@ func (rc *realBatchControlPlane) EnsureBatchPodsReadyAndLabeled() error { klog.Infof("BatchRelease %v calculated context when check batch ready: %s", klog.KObj(rc.release), batchContext.Log()) + if reconciler, ok := controller.(MinReadyDriftReconciler); ok { + if err := reconciler.ReconcileMaxUnavailableDrift(rc.ctx, batchContext); err != nil { + return err + } + } + if err := batchContext.IsBatchReady(); err != nil { if lifecycle, ok := controller.(MinReadyLifecycle); ok { lifecycle.ObserveBatchWait() diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go index 95b00996..de1118c7 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go @@ -145,10 +145,28 @@ func (mc *MinReadyControl) UpgradeBatch(ctx context.Context, batchContext *batch mc.RecordOperationFailed("MinReadyBatchingFailed", wrapped) return wrapped } + return mc.reconcileMaxUnavailable(ctx, batchContext) +} + +func (mc *MinReadyControl) ReconcileMaxUnavailableDrift(ctx context.Context, batchContext *batchcontext.BatchContext) error { + if err := mc.ensureInflatedDeploymentStrategy(ctx); err != nil { + wrapped := fmt.Errorf("MinReadyControl.ReconcileMaxUnavailableDrift[%d]: %w", batchContext.CurrentBatch, err) + mc.RecordOperationFailed("MinReadyBatchingFailed", wrapped) + return wrapped + } + return mc.reconcileMaxUnavailable(ctx, batchContext) +} + +func (mc *MinReadyControl) reconcileMaxUnavailable(ctx context.Context, batchContext *batchcontext.BatchContext) error { + if err := mc.refreshDeployment(ctx); err != nil { + wrapped := fmt.Errorf("MinReadyControl.reconcileMaxUnavailable[%d]: %w", batchContext.CurrentBatch, err) + mc.RecordOperationFailed("MinReadyBatchingFailed", wrapped) + return wrapped + } current, err := intstr.GetScaledValueFromIntOrPercent( mc.object.Spec.Strategy.RollingUpdate.MaxUnavailable, int(batchContext.Replicas), true) if err != nil { - wrapped := fmt.Errorf("MinReadyControl.UpgradeBatch[%d]: %w", batchContext.CurrentBatch, err) + wrapped := fmt.Errorf("MinReadyControl.reconcileMaxUnavailable[%d]: %w", batchContext.CurrentBatch, err) mc.RecordOperationFailed("MinReadyBatchingFailed", wrapped) return wrapped } @@ -170,10 +188,24 @@ func (mc *MinReadyControl) UpgradeBatch(ctx context.Context, batchContext *batch modified.Spec.Strategy.RollingUpdate.MaxUnavailable = &maxUnavailable patch := client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{}) if err := mc.client.Patch(ctx, modified, patch); err != nil { - wrapped := fmt.Errorf("MinReadyControl.UpgradeBatch[%d]: %w", batchContext.CurrentBatch, err) + wrapped := fmt.Errorf("MinReadyControl.reconcileMaxUnavailable[%d]: %w", batchContext.CurrentBatch, err) mc.RecordOperationFailed("MinReadyBatchingFailed", wrapped) return wrapped } + mc.object = modified + return nil +} + +func (mc *MinReadyControl) refreshDeployment(ctx context.Context) error { + if mc.realController == nil { + return fmt.Errorf("deployment is not loaded") + } + object := &apps.Deployment{} + if err := mc.client.Get(ctx, mc.key, object); err != nil { + return err + } + mc.object = object + mc.WorkloadInfo = mc.getWorkloadInfo(object) return nil } @@ -486,3 +518,4 @@ var EventDegradedDriftDetected = partitionstyle.ErrMinReadyDriftDetected.Error() var _ partitionstyle.Interface = (*MinReadyControl)(nil) var _ partitionstyle.MinReadyStatusBinder = (*MinReadyControl)(nil) var _ partitionstyle.MinReadyLifecycle = (*MinReadyControl)(nil) +var _ partitionstyle.MinReadyDriftReconciler = (*MinReadyControl)(nil) diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go index 30cd8cfb..6ebaabd0 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go @@ -275,6 +275,29 @@ func TestMinReadyUpgradeBatchRestoresInflatedStrategyFields(t *testing.T) { } } +func TestMinReadyReconcileMaxUnavailableDriftConvergesExternalTampering(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newInflatedMinReadyDeployment() + addMinReadyOriginalAnnotations(deployment) + maxUnavailable := intstr.FromInt(5) + deployment.Spec.Strategy.RollingUpdate.MaxUnavailable = &maxUnavailable + control := newBuiltMinReadyControl(t, deployment) + ctx := &batchcontext.BatchContext{ + CurrentBatch: 0, + Replicas: 5, + DesiredUpdatedReplicas: 1, + } + + if err := control.ReconcileMaxUnavailableDrift(context.Background(), ctx); err != nil { + t.Fatalf("ReconcileMaxUnavailableDrift failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + if value := minReadyMaxUnavailableValue(t, got, 5); value != 1 { + t.Fatalf("maxUnavailable = %d, want 1 (converged while batch is ready)", value) + } +} + func TestMinReadyUpgradeBatchConvergesMaxUnavailableOnScaleDown(t *testing.T) { // P1-2: after a scale-down (HPA or manual) the previously-set integer // maxUnavailable can exceed the new batch target. This is a legal state, not diff --git a/pkg/controller/batchrelease/control/partitionstyle/minready_status.go b/pkg/controller/batchrelease/control/partitionstyle/minready_status.go index b2db8c05..15cc7c0d 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/minready_status.go +++ b/pkg/controller/batchrelease/control/partitionstyle/minready_status.go @@ -17,6 +17,7 @@ limitations under the License. package partitionstyle import ( + "context" "errors" "time" @@ -24,6 +25,7 @@ import ( "k8s.io/client-go/tools/record" "github.com/openkruise/rollouts/api/v1beta1" + batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" brmetrics "github.com/openkruise/rollouts/pkg/controller/batchrelease/metrics" "github.com/openkruise/rollouts/pkg/util" ) @@ -45,6 +47,13 @@ type MinReadyLifecycle interface { RecordOperationFailed(reason string, err error) } +// MinReadyDriftReconciler converges inflated maxUnavailable back to the active +// batch target. EnsureBatchPodsReadyAndLabeled calls it so external drift is +// healed even while BatchRelease waits in ReadyBatchState for rollout resume. +type MinReadyDriftReconciler interface { + ReconcileMaxUnavailableDrift(ctx context.Context, batchContext *batchcontext.BatchContext) error +} + type MinReadyStatusWriter struct { release *v1beta1.BatchRelease status *v1beta1.BatchReleaseStatus diff --git a/test/e2e/minready/deployment_minready_test.go b/test/e2e/minready/deployment_minready_test.go index 696e9f62..92a313cc 100644 --- a/test/e2e/minready/deployment_minready_test.go +++ b/test/e2e/minready/deployment_minready_test.go @@ -131,6 +131,8 @@ var _ = SIGDescribe("Deployment MinReadySeconds", func() { waitMinReadyE2ERolloutStepPaused(namespace, rollout.Name, 1) patchMinReadyE2EMaxUnavailable(namespace, 5) + // Heal drift to the paused step's batch target (20% on 5 replicas => 1). + expectMinReadyE2EInflatedMaxUnavailable(namespace, 1) resumeMinReadyE2ERollout(namespace, rollout.Name) expectMinReadyE2EInflatedMaxUnavailable(namespace, 3) From 9485d09ca7703b70ca7e502938d66bedfa4aec6a Mon Sep 17 00:00:00 2001 From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Date: Tue, 16 Jun 2026 14:03:18 +0800 Subject: [PATCH 18/22] ci: retrigger workflows; fix TC7 flake and DCO sign-off Wait for step 2 pause before asserting post-resume maxUnavailable in TC7 so UpgradeBatch has applied the 60% batch target. Signed-off-by: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Co-authored-by: Cursor Signed-off-by: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Co-authored-by: Cursor --- test/e2e/minready/deployment_minready_helpers_test.go | 4 ++-- .../minready/deployment_minready_scenarios_helper_test.go | 6 +++++- test/e2e/minready/deployment_minready_test.go | 4 ++-- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/test/e2e/minready/deployment_minready_helpers_test.go b/test/e2e/minready/deployment_minready_helpers_test.go index 06996624..28ecb40f 100644 --- a/test/e2e/minready/deployment_minready_helpers_test.go +++ b/test/e2e/minready/deployment_minready_helpers_test.go @@ -148,7 +148,7 @@ func resumeMinReadyE2ERollout(namespace, name string) { resumedStep = rollout.Status.CanaryStatus.CurrentStepIndex body := fmt.Sprintf(`{"status":{"canaryStatus":{"currentStepState":"%s"}}}`, v1beta1.CanaryStepStateReady) return k8sClient.Status().Patch(context.TODO(), rollout, client.RawPatch(types.MergePatchType, []byte(body))) == nil - }, 2*time.Minute, time.Second).Should(BeTrue()) + }, 5*time.Minute, time.Second).Should(BeTrue()) if resumedStep < 0 { return } @@ -162,7 +162,7 @@ func resumeMinReadyE2ERollout(namespace, name string) { return rollout.Status.CanaryStatus != nil && (rollout.Status.CanaryStatus.CurrentStepIndex != resumedStep || rollout.Status.CanaryStatus.CurrentStepState != v1beta1.CanaryStepStatePaused) - }, 2*time.Minute, time.Second).Should(BeTrue()) + }, 5*time.Minute, time.Second).Should(BeTrue()) } func completeMinReadyE2ERollout(namespace, name string) { diff --git a/test/e2e/minready/deployment_minready_scenarios_helper_test.go b/test/e2e/minready/deployment_minready_scenarios_helper_test.go index 68b574c8..47cc6fbf 100644 --- a/test/e2e/minready/deployment_minready_scenarios_helper_test.go +++ b/test/e2e/minready/deployment_minready_scenarios_helper_test.go @@ -151,13 +151,17 @@ func intstrFromStringPtr(value string) *intstr.IntOrString { } func expectMinReadyE2EInflatedMaxUnavailable(namespace string, want int32) { + waitMinReadyE2EInflatedMaxUnavailable(namespace, want, 5*time.Minute) +} + +func waitMinReadyE2EInflatedMaxUnavailable(namespace string, want int32, timeout time.Duration) { Eventually(func() bool { deployment := &apps.Deployment{} key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} Expect(k8sClient.Get(context.TODO(), key, deployment)).Should(Succeed()) got := deployment.Spec.Strategy.RollingUpdate.MaxUnavailable return got != nil && got.IntVal == want - }, 5*time.Minute, time.Second).Should(BeTrue(), fmt.Sprintf("want maxUnavailable %d", want)) + }, timeout, time.Second).Should(BeTrue(), fmt.Sprintf("want maxUnavailable %d", want)) } func expectMinReadyE2EOriginalAnnotationAbsent(namespace string) { diff --git a/test/e2e/minready/deployment_minready_test.go b/test/e2e/minready/deployment_minready_test.go index 92a313cc..b95d8afe 100644 --- a/test/e2e/minready/deployment_minready_test.go +++ b/test/e2e/minready/deployment_minready_test.go @@ -134,8 +134,8 @@ var _ = SIGDescribe("Deployment MinReadySeconds", func() { // Heal drift to the paused step's batch target (20% on 5 replicas => 1). expectMinReadyE2EInflatedMaxUnavailable(namespace, 1) resumeMinReadyE2ERollout(namespace, rollout.Name) - - expectMinReadyE2EInflatedMaxUnavailable(namespace, 3) + // 50% batch target is also 3 on 5 replicas; wait for UpgradeBatch, not step 2 pause. + waitMinReadyE2EInflatedMaxUnavailable(namespace, 3, 10*time.Minute) finishMinReadyE2ERollout(namespace, rollout.Name) }) From 6eae7fffeb2bf0bd4fb0cc78cdc92701e32f0b2e Mon Sep 17 00:00:00 2001 From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Date: Sat, 20 Jun 2026 20:37:07 +0800 Subject: [PATCH 19/22] feat: MinReady sliding window + review follow-ups (P0-3, P1-7, P1-8) Address openkruise/rollouts#343 review comments: P0-3: implement sliding window in reconcileMaxUnavailable so a large batch target (e.g. 99 after a 1-pod canary) no longer writes maxUnavailable in a single patch. Advance by the user's original maxUnavailable step, waiting for the current window pods to become available (UpdatedReadyReplicas >= current) before widening the budget. maxUnavailable=0 (surge-only) falls back to driving the batch directly. P1-7: converge condition/event/metrics reporting to the outer control_plane trunk via MinReadyLifecycle; lower-layer methods now only return errors instead of recording conditions themselves. P1-8: replace DeletionTimestamp check with util.IsPodActive in countUpdatedAvailablePods, mirroring upstream kubernetes controller_utils.go. Add 3 sliding-window unit tests; all 8 affected packages pass. Signed-off-by: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> --- .../control/partitionstyle/control_plane.go | 12 ++ .../partitionstyle/control_plane_test.go | 45 ++---- .../deployment/minready_batch_context.go | 2 +- .../deployment/minready_control.go | 142 ++++++++++-------- .../deployment/minready_control_test.go | 97 +++++++++++- .../control/partitionstyle/minready_status.go | 2 + pkg/util/pod_utils.go | 9 ++ 7 files changed, 215 insertions(+), 94 deletions(-) diff --git a/pkg/controller/batchrelease/control/partitionstyle/control_plane.go b/pkg/controller/batchrelease/control/partitionstyle/control_plane.go index 442dcaeb..6b63624a 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/control_plane.go +++ b/pkg/controller/batchrelease/control/partitionstyle/control_plane.go @@ -96,8 +96,12 @@ func (rc *realBatchControlPlane) Initialize() error { // claim workload under our control err = controller.Initialize(rc.ctx, rc.release) if err != nil { + rc.reportOperationFailed(controller, "MinReadyInitializeFailed", err) return err } + if lifecycle, ok := controller.(MinReadyLifecycle); ok { + lifecycle.RecordInitialized() + } // record revision and replicas workloadInfo := controller.GetWorkloadInfo() @@ -136,6 +140,7 @@ func (rc *realBatchControlPlane) UpgradeBatch() error { batchContext, err := controller.CalculateBatchContext(rc.release) if err != nil { + rc.reportOperationFailed(controller, "MinReadyBatchingFailed", err) return err } klog.Infof("BatchRelease %v calculated context when upgrade batch: %s", @@ -143,6 +148,7 @@ func (rc *realBatchControlPlane) UpgradeBatch() error { err = controller.UpgradeBatch(rc.ctx, batchContext) if err != nil { + rc.reportOperationFailed(controller, "MinReadyBatchingFailed", err) return err } @@ -175,6 +181,7 @@ func (rc *realBatchControlPlane) EnsureBatchPodsReadyAndLabeled() error { // the target calculated should be consistent with UpgradeBatch. batchContext, err := controller.CalculateBatchContext(rc.release) if err != nil { + rc.reportOperationFailed(controller, "MinReadyBatchingFailed", err) return err } @@ -183,6 +190,7 @@ func (rc *realBatchControlPlane) EnsureBatchPodsReadyAndLabeled() error { if reconciler, ok := controller.(MinReadyDriftReconciler); ok { if err := reconciler.ReconcileMaxUnavailableDrift(rc.ctx, batchContext); err != nil { + rc.reportOperationFailed(controller, "MinReadyBatchingFailed", err) return err } } @@ -212,8 +220,12 @@ func (rc *realBatchControlPlane) Finalize() error { // release workload control info and clean up resources if it needs if err := controller.Finalize(rc.ctx, rc.release); err != nil { + rc.reportOperationFailed(controller, "MinReadyFinalizeFailed", err) return err } + if lifecycle, ok := controller.(MinReadyLifecycle); ok { + lifecycle.RecordFinalized() + } return nil } diff --git a/pkg/controller/batchrelease/control/partitionstyle/control_plane_test.go b/pkg/controller/batchrelease/control/partitionstyle/control_plane_test.go index f71adf57..30ece0c4 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/control_plane_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/control_plane_test.go @@ -111,6 +111,18 @@ func (f *fakePartitionController) RecordBatchReady() { } } +func (f *fakePartitionController) RecordInitialized() { + if f.statusWriter != nil { + f.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyInitialized, "MinReadyInitialized", "MinReadySeconds strategy initialized") + } +} + +func (f *fakePartitionController) RecordFinalized() { + if f.statusWriter != nil { + f.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyFinalized, "MinReadyFinalized", "MinReadySeconds strategy finalized") + } +} + func (f *fakePartitionController) ObserveBatchWait() { if f.statusWriter == nil { return @@ -138,9 +150,6 @@ func (f *fakePartitionController) ListOwnedPods() ([]*corev1.Pod, error) { func (f *fakePartitionController) CalculateBatchContext(release *v1beta1.BatchRelease) (*batchcontext.BatchContext, error) { f.calculateCalls++ if f.calcErr != nil { - if f.statusWriter != nil { - f.statusWriter.RecordDegraded("MinReadyBatchingFailed", f.calcErr) - } return nil, f.calcErr } if f.batchCtx != nil { @@ -151,41 +160,17 @@ func (f *fakePartitionController) CalculateBatchContext(release *v1beta1.BatchRe func (f *fakePartitionController) Initialize(ctx context.Context, release *v1beta1.BatchRelease) error { f.initCalls++ - if f.initErr != nil { - if f.statusWriter != nil { - f.statusWriter.RecordDegraded("MinReadyInitializeFailed", f.initErr) - } - return f.initErr - } - if f.statusWriter != nil { - f.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyInitialized, "MinReadyInitialized", "MinReadySeconds strategy initialized") - } - return nil + return f.initErr } func (f *fakePartitionController) UpgradeBatch(context.Context, *batchcontext.BatchContext) error { f.upgradeCalls++ - if f.upgradeErr != nil { - if f.statusWriter != nil { - f.statusWriter.RecordDegraded("MinReadyBatchingFailed", f.upgradeErr) - } - return f.upgradeErr - } - return nil + return f.upgradeErr } func (f *fakePartitionController) Finalize(context.Context, *v1beta1.BatchRelease) error { f.finalizeCalls++ - if f.finalizeErr != nil { - if f.statusWriter != nil { - f.statusWriter.RecordDegraded("MinReadyFinalizeFailed", f.finalizeErr) - } - return f.finalizeErr - } - if f.statusWriter != nil { - f.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyFinalized, "MinReadyFinalized", "MinReadySeconds strategy finalized") - } - return nil + return f.finalizeErr } func (f *fakePartitionController) IsMinReadyControl() bool { diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_batch_context.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_batch_context.go index d0540b8f..4735b175 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_batch_context.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_batch_context.go @@ -40,7 +40,7 @@ func (mc *MinReadyControl) minReadyUpdatedReadyReplicas(updateRevision string) ( func countUpdatedAvailablePods(pods []*corev1.Pod, updateRevision string, minReadySeconds int32, now time.Time) int32 { return int32(util.WrappedPodCount(pods, func(pod *corev1.Pod) bool { - if !pod.DeletionTimestamp.IsZero() { + if !util.IsPodActive(pod) { return false } if !util.IsConsistentWithRevision(pod.Labels, updateRevision) { diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go index de1118c7..270d9b55 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go @@ -79,6 +79,18 @@ func (mc *MinReadyControl) RecordBatchReady() { } } +func (mc *MinReadyControl) RecordInitialized() { + if mc.statusWriter != nil { + mc.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyInitialized, "MinReadyInitialized", "MinReadySeconds strategy initialized") + } +} + +func (mc *MinReadyControl) RecordFinalized() { + if mc.statusWriter != nil { + mc.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyFinalized, "MinReadyFinalized", "MinReadySeconds strategy finalized") + } +} + func (mc *MinReadyControl) ObserveBatchWait() { if mc.statusWriter == nil { return @@ -108,89 +120,116 @@ func (mc *MinReadyControl) BuildController() (partitionstyle.Interface, error) { func (mc *MinReadyControl) Initialize(ctx context.Context, release *v1beta1.BatchRelease) error { if release == nil { - err := fmt.Errorf("MinReadyControl.Initialize: release is nil") - mc.RecordOperationFailed("MinReadyInitializeFailed", err) - return err + return fmt.Errorf("MinReadyControl.Initialize: release is nil") } if err := mc.ensureInitializeAllowed(); err != nil { - wrapped := fmt.Errorf("MinReadyControl.Initialize: %w", err) - mc.RecordOperationFailed("MinReadyInitializeFailed", wrapped) - return wrapped + return fmt.Errorf("MinReadyControl.Initialize: %w", err) } original := mc.object modified := mc.object.DeepCopy() if err := prepareOriginalAnnotations(original, modified); err != nil { - wrapped := fmt.Errorf("MinReadyControl.Initialize: %w", err) - mc.RecordOperationFailed("MinReadyInitializeFailed", wrapped) - return wrapped + return fmt.Errorf("MinReadyControl.Initialize: %w", err) } modified.Annotations[util.BatchReleaseControlAnnotation] = util.DumpJSON(metav1.NewControllerRef( release, release.GetObjectKind().GroupVersionKind())) inflateDeploymentStrategy(modified) patch := client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{}) if err := mc.client.Patch(ctx, modified, patch); err != nil { - wrapped := fmt.Errorf("MinReadyControl.Initialize: %w", err) - mc.RecordOperationFailed("MinReadyInitializeFailed", wrapped) - return wrapped - } - if mc.statusWriter != nil { - mc.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyInitialized, "MinReadyInitialized", "MinReadySeconds strategy initialized") + return fmt.Errorf("MinReadyControl.Initialize: %w", err) } return nil } func (mc *MinReadyControl) UpgradeBatch(ctx context.Context, batchContext *batchcontext.BatchContext) error { if err := mc.ensureInflatedDeploymentStrategy(ctx); err != nil { - wrapped := fmt.Errorf("MinReadyControl.UpgradeBatch[%d]: %w", batchContext.CurrentBatch, err) - mc.RecordOperationFailed("MinReadyBatchingFailed", wrapped) - return wrapped + return fmt.Errorf("MinReadyControl.UpgradeBatch[%d]: %w", batchContext.CurrentBatch, err) } return mc.reconcileMaxUnavailable(ctx, batchContext) } func (mc *MinReadyControl) ReconcileMaxUnavailableDrift(ctx context.Context, batchContext *batchcontext.BatchContext) error { if err := mc.ensureInflatedDeploymentStrategy(ctx); err != nil { - wrapped := fmt.Errorf("MinReadyControl.ReconcileMaxUnavailableDrift[%d]: %w", batchContext.CurrentBatch, err) - mc.RecordOperationFailed("MinReadyBatchingFailed", wrapped) - return wrapped + return fmt.Errorf("MinReadyControl.ReconcileMaxUnavailableDrift[%d]: %w", batchContext.CurrentBatch, err) } return mc.reconcileMaxUnavailable(ctx, batchContext) } func (mc *MinReadyControl) reconcileMaxUnavailable(ctx context.Context, batchContext *batchcontext.BatchContext) error { if err := mc.refreshDeployment(ctx); err != nil { - wrapped := fmt.Errorf("MinReadyControl.reconcileMaxUnavailable[%d]: %w", batchContext.CurrentBatch, err) - mc.RecordOperationFailed("MinReadyBatchingFailed", wrapped) - return wrapped + return fmt.Errorf("MinReadyControl.reconcileMaxUnavailable[%d]: %w", batchContext.CurrentBatch, err) } current, err := intstr.GetScaledValueFromIntOrPercent( mc.object.Spec.Strategy.RollingUpdate.MaxUnavailable, int(batchContext.Replicas), true) if err != nil { - wrapped := fmt.Errorf("MinReadyControl.reconcileMaxUnavailable[%d]: %w", batchContext.CurrentBatch, err) - mc.RecordOperationFailed("MinReadyBatchingFailed", wrapped) - return wrapped + return fmt.Errorf("MinReadyControl.reconcileMaxUnavailable[%d]: %w", batchContext.CurrentBatch, err) } target := batchContext.DesiredUpdatedReplicas - if int32(current) == target { - return nil - } - if int32(current) > target { - // maxUnavailable above the batch target is a legal state after a - // scale-down (HPA or manual) and also self-heals external tampering; - // converge it back to the target instead of reporting degraded drift. + + // At or above the batch target there is nothing to advance. When current + // exceeds the target (HPA scale-down or external tampering) converge it + // back down so the native controller never holds a wider budget than this + // batch needs. + if int32(current) >= target { + if int32(current) == target { + return nil + } warningS(nil, "MinReady maxUnavailable exceeds target, reducing", "batch", batchContext.CurrentBatch, "deployment", klog.KObj(mc.object), "maxUnavailable", current, "target", target) + return mc.patchMaxUnavailable(ctx, int(target)) } + + // Sliding window (P0-3): advance maxUnavailable by the user's original + // budget one step at a time, waiting for the current window's pods to + // become available before widening the budget again. Without this, a large + // batch target (e.g. 99 after a 1-pod canary) is written in a single patch + // and the native controller tears down far more pods than the user's + // declared maxUnavailable in one shot, breaking the anti-disturbance safety + // the batched release is supposed to provide. + step, err := mc.maxUnavailableStep(batchContext.Replicas) + if err != nil { + return fmt.Errorf("MinReadyControl.reconcileMaxUnavailable[%d]: %w", batchContext.CurrentBatch, err) + } + if step <= 0 { + // maxUnavailable=0 means the user relies on maxSurge for concurrency + // control; there is no budget to slide, so drive the batch directly. + return mc.patchMaxUnavailable(ctx, int(target)) + } + if batchContext.UpdatedReadyReplicas < int32(current) { + // current window not yet filled; keep the budget and wait for readiness + return nil + } + next := current + step + if int32(next) > target { + next = int(target) + } + return mc.patchMaxUnavailable(ctx, next) +} + +// maxUnavailableStep returns the user's original maxUnavailable scaled to the +// replica count; the sliding window uses it as the advancement stride. +func (mc *MinReadyControl) maxUnavailableStep(replicas int32) (int, error) { + original, err := parseOriginalDeploymentStrategy(mc.object.Annotations) + if err != nil { + return 0, err + } + step := intstr.FromString(DefaultMaxUnavailable) + if original.maxUnavailable != nil { + step = *original.maxUnavailable + } + return intstr.GetScaledValueFromIntOrPercent(&step, int(replicas), true) +} + +// patchMaxUnavailable writes the given integer maxUnavailable back to the +// Deployment with an optimistic-lock patch and refreshes the cached object. +func (mc *MinReadyControl) patchMaxUnavailable(ctx context.Context, value int) error { original := mc.object modified := mc.object.DeepCopy() - maxUnavailable := intstr.FromInt(int(target)) + maxUnavailable := intstr.FromInt(value) modified.Spec.Strategy.RollingUpdate.MaxUnavailable = &maxUnavailable patch := client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{}) if err := mc.client.Patch(ctx, modified, patch); err != nil { - wrapped := fmt.Errorf("MinReadyControl.reconcileMaxUnavailable[%d]: %w", batchContext.CurrentBatch, err) - mc.RecordOperationFailed("MinReadyBatchingFailed", wrapped) - return wrapped + return fmt.Errorf("MinReadyControl.reconcileMaxUnavailable: %w", err) } mc.object = modified return nil @@ -215,19 +254,15 @@ func (mc *MinReadyControl) Finalize(ctx context.Context, _ *v1beta1.BatchRelease } if !hasAnyOriginalAnnotation(mc.object.Annotations) { if hasInflatedDeploymentFields(mc.object) { - err := fmt.Errorf("MinReadyControl.Finalize: annotation state missing while deployment fields are still inflated: %w", + return fmt.Errorf("MinReadyControl.Finalize: annotation state missing while deployment fields are still inflated: %w", partitionstyle.ErrMinReadyAnnotationInvalid) - mc.RecordOperationFailed("MinReadyFinalizeFailed", err) - return err } return nil } original := mc.object restored, err := parseOriginalDeploymentStrategy(original.Annotations) if err != nil { - wrapped := fmt.Errorf("MinReadyControl.Finalize: %w", err) - mc.RecordOperationFailed("MinReadyFinalizeFailed", wrapped) - return wrapped + return fmt.Errorf("MinReadyControl.Finalize: %w", err) } modified := mc.object.DeepCopy() applyOriginalDeploymentStrategy(modified, restored) @@ -238,12 +273,7 @@ func (mc *MinReadyControl) Finalize(ctx context.Context, _ *v1beta1.BatchRelease delete(modified.Labels, v1alpha1.DeploymentStableRevisionLabel) patch := client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{}) if err := mc.client.Patch(ctx, modified, patch); err != nil { - wrapped := fmt.Errorf("MinReadyControl.Finalize: %w", err) - mc.RecordOperationFailed("MinReadyFinalizeFailed", wrapped) - return wrapped - } - if mc.statusWriter != nil { - mc.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyFinalized, "MinReadyFinalized", "MinReadySeconds strategy finalized") + return fmt.Errorf("MinReadyControl.Finalize: %w", err) } return nil } @@ -252,9 +282,7 @@ func (mc *MinReadyControl) CalculateBatchContext(release *v1beta1.BatchRelease) rolloutID := release.Spec.ReleasePlan.RolloutID if rolloutID != "" { if _, err := mc.ListOwnedPods(); err != nil { - wrapped := fmt.Errorf("MinReadyControl.CalculateBatchContext: %w", err) - mc.RecordOperationFailed("MinReadyBatchingFailed", wrapped) - return nil, wrapped + return nil, fmt.Errorf("MinReadyControl.CalculateBatchContext: %w", err) } } @@ -262,15 +290,11 @@ func (mc *MinReadyControl) CalculateBatchContext(release *v1beta1.BatchRelease) desiredPartition := release.Spec.ReleasePlan.Batches[currentBatch].CanaryReplicas desiredUpdatedReplicas, err := minReadyDesiredUpdatedReplicas(desiredPartition, mc.object) if err != nil { - wrapped := fmt.Errorf("MinReadyControl.CalculateBatchContext: %w", err) - mc.RecordOperationFailed("MinReadyBatchingFailed", wrapped) - return nil, wrapped + return nil, fmt.Errorf("MinReadyControl.CalculateBatchContext: %w", err) } updatedReadyReplicas, err := mc.minReadyUpdatedReadyReplicas(release.Status.UpdateRevision) if err != nil { - wrapped := fmt.Errorf("MinReadyControl.CalculateBatchContext: %w", err) - mc.RecordOperationFailed("MinReadyBatchingFailed", wrapped) - return nil, wrapped + return nil, fmt.Errorf("MinReadyControl.CalculateBatchContext: %w", err) } return &batchcontext.BatchContext{ RolloutID: rolloutID, diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go index 6ebaabd0..439401c6 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go @@ -185,8 +185,10 @@ func TestMinReadyUpgradeBatchUpdatesMaxUnavailableOnly(t *testing.T) { } got := fetchMinReadyDeployment(t, control) - if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 5 { - t.Fatalf("maxUnavailable = %v, want 5", unavailable) + // Sliding window (P0-3): UpgradeBatch advances maxUnavailable one step + // (original 25% of 10 = 3) toward the batch target 5, not straight to 5. + if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 3 { + t.Fatalf("maxUnavailable = %v, want 3 (first sliding-window step)", unavailable) } if got.Spec.Strategy.Type != apps.RollingUpdateDeploymentStrategyType { t.Fatalf("strategy.type = %q, want RollingUpdate", got.Spec.Strategy.Type) @@ -270,8 +272,10 @@ func TestMinReadyUpgradeBatchRestoresInflatedStrategyFields(t *testing.T) { if got.Spec.Strategy.RollingUpdate == nil { t.Fatalf("rollingUpdate is nil, want restored strategy") } - if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 5 { - t.Fatalf("maxUnavailable = %v, want 5", unavailable) + // Sliding window (P0-3): after re-inflation maxUnavailable starts at 0, so + // UpgradeBatch advances it one step (25% of 10 = 3) toward target 5. + if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 3 { + t.Fatalf("maxUnavailable = %v, want 3 (first sliding-window step)", unavailable) } } @@ -579,3 +583,88 @@ func TestMinReadyFinalizeRestoresAfterGateDisabled(t *testing.T) { t.Fatalf("original annotations not cleaned up: %v", got.Annotations) } } + +func TestMinReadySlidingWindowAdvancesStepByStep(t *testing.T) { + // P0-3: a large batch target must not be written to maxUnavailable in a + // single patch. reconcileMaxUnavailable advances by the user's original + // maxUnavailable (25% of 10 = 3) one step at a time, and only widens the + // budget once the current window's pods are available. + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newInflatedMinReadyDeployment() + addMinReadyOriginalAnnotations(deployment) + control := newBuiltMinReadyControl(t, deployment) + ctx := &batchcontext.BatchContext{ + CurrentBatch: 1, + Replicas: 10, + DesiredUpdatedReplicas: 9, + } + + steps := []struct { + ready int32 + wantMU int + comment string + }{ + {0, 3, "empty window advances to first step"}, + {1, 3, "window not filled holds budget"}, + {3, 6, "filled window advances one step"}, + {6, 9, "advance caps at target"}, + {9, 9, "at target holds"}, + } + for i, s := range steps { + ctx.UpdatedReadyReplicas = s.ready + if err := control.ReconcileMaxUnavailableDrift(context.Background(), ctx); err != nil { + t.Fatalf("step %d (%s): %v", i, s.comment, err) + } + if v := minReadyMaxUnavailableValue(t, fetchMinReadyDeployment(t, control), 10); v != s.wantMU { + t.Fatalf("step %d (%s): maxUnavailable = %d, want %d", i, s.comment, v, s.wantMU) + } + } +} + +func TestMinReadySlidingWindowReachesSmallTargetInOneStep(t *testing.T) { + // P0-3: when the batch target is within one step (target <= step), the + // first advance is capped at the target, so small batches complete in one + // reconcile instead of overshooting. + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newInflatedMinReadyDeployment() + addMinReadyOriginalAnnotations(deployment) + control := newBuiltMinReadyControl(t, deployment) + ctx := &batchcontext.BatchContext{ + CurrentBatch: 1, + Replicas: 10, + DesiredUpdatedReplicas: 2, + UpdatedReadyReplicas: 0, + } + if err := control.ReconcileMaxUnavailableDrift(context.Background(), ctx); err != nil { + t.Fatalf("drift reconcile failed: %v", err) + } + if v := minReadyMaxUnavailableValue(t, fetchMinReadyDeployment(t, control), 10); v != 2 { + t.Fatalf("maxUnavailable = %d, want 2 (small target reached in one step)", v) + } +} + +func TestMinReadySlidingWindowStepZeroDrivesBatchDirectly(t *testing.T) { + // P0-3: original maxUnavailable=0 means the user relies on maxSurge for + // concurrency control, so there is no budget to slide; the batch target is + // driven directly to preserve the existing surge-gated behavior. + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newInflatedMinReadyDeployment() + deployment.Annotations = map[string]string{ + AnnotationOriginalMinReadySeconds: "7", + AnnotationOriginalProgressDeadlineSeconds: "60", + AnnotationOriginalMaxUnavailable: "0", + } + control := newBuiltMinReadyControl(t, deployment) + ctx := &batchcontext.BatchContext{ + CurrentBatch: 1, + Replicas: 10, + DesiredUpdatedReplicas: 5, + UpdatedReadyReplicas: 0, + } + if err := control.ReconcileMaxUnavailableDrift(context.Background(), ctx); err != nil { + t.Fatalf("drift reconcile failed: %v", err) + } + if v := minReadyMaxUnavailableValue(t, fetchMinReadyDeployment(t, control), 10); v != 5 { + t.Fatalf("maxUnavailable = %d, want 5 (step=0 drives batch directly)", v) + } +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/minready_status.go b/pkg/controller/batchrelease/control/partitionstyle/minready_status.go index 15cc7c0d..b56a9890 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/minready_status.go +++ b/pkg/controller/batchrelease/control/partitionstyle/minready_status.go @@ -45,6 +45,8 @@ type MinReadyLifecycle interface { RecordBatchReady() ObserveBatchWait() RecordOperationFailed(reason string, err error) + RecordInitialized() + RecordFinalized() } // MinReadyDriftReconciler converges inflated maxUnavailable back to the active diff --git a/pkg/util/pod_utils.go b/pkg/util/pod_utils.go index 93aad6d0..043d6755 100644 --- a/pkg/util/pod_utils.go +++ b/pkg/util/pod_utils.go @@ -112,6 +112,15 @@ func IsCompletedPod(pod *v1.Pod) bool { return pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded } +// IsPodActive returns true if a pod is not in a terminal phase and has not been +// marked for deletion. This mirrors the upstream Kubernetes IsPodActive check in +// pkg/controller/controller_utils.go. +func IsPodActive(pod *v1.Pod) bool { + return pod.Status.Phase != v1.PodSucceeded && + pod.Status.Phase != v1.PodFailed && + pod.DeletionTimestamp == nil +} + // ListOwnedPods will list all pods belong to workload, including terminating pods func ListOwnedPods(c client.Client, workload client.Object) ([]*v1.Pod, error) { selector, err := getSelector(workload) From 6661336d16f6f55f225845839c73af80d8e33652 Mon Sep 17 00:00:00 2001 From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Date: Wed, 24 Jun 2026 11:29:12 +0800 Subject: [PATCH 20/22] test: update integration assertion for P0-3 sliding window (want 10 to 5) TestDeploymentMinReadyConcurrentScaleUsesLatestReplicas expected the full batch target (10) in one patch. After P0-3, UpgradeBatch advances maxUnavailable one sliding-window step at a time (25% of 20 replicas = 5). Update the assertion and add a comment explaining the sliding-window first-step semantics. Signed-off-by: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> --- test/integration/concurrency_test.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/test/integration/concurrency_test.go b/test/integration/concurrency_test.go index 833a4ded..7e680a4c 100644 --- a/test/integration/concurrency_test.go +++ b/test/integration/concurrency_test.go @@ -52,8 +52,15 @@ func TestDeploymentMinReadyConcurrentScaleUsesLatestReplicas(t *testing.T) { } got := fetchIntegrationDeployment(t, cli, deployment) - if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 10 { - t.Fatalf("maxUnavailable = %v, want 10 after scale to 20 replicas", unavailable) + // P0-3 sliding window: UpgradeBatch advances maxUnavailable one step at a + // time using the original budget (25%), not the full batch target in one + // patch. First step = 25% * 20 = 5 (computed from the scaled-up replica + // count, not the pre-scale 10 which would give 25% * 10 = 2); the batch + // target 50% * 20 = 10 is reached over subsequent reconciles as the window + // fills. This still asserts the "uses latest replicas" intent: the step is + // derived from the current replica count (20), not a stale one. + if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 5 { + t.Fatalf("maxUnavailable = %v, want 5 (sliding-window first step = 25%% of 20 replicas) after scale to 20 replicas", unavailable) } assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyBatching, corev1.ConditionTrue, "MinReadyBatching") } From f7168a002d620d2dc437f4764774e601054ede01 Mon Sep 17 00:00:00 2001 From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Date: Sun, 28 Jun 2026 00:21:48 +0800 Subject: [PATCH 21/22] fix: address MinReady review feedback Signed-off-by: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> --- api/v1alpha1/deployment_types.go | 2 +- api/v1alpha1/deployment_types_test.go | 35 +++ api/v1beta1/deployment_types.go | 7 +- api/v1beta1/deployment_types_test.go | 35 +++ ...t-minready-seconds-progressive-delivery.md | 6 +- .../control/partitionstyle/control_plane.go | 64 ++++-- .../partitionstyle/control_plane_test.go | 39 +++- .../deployment/minready_batch_context.go | 7 +- .../deployment/minready_constants.go | 8 +- .../deployment/minready_control.go | 90 ++------ .../deployment/minready_control_test.go | 67 +----- .../partitionstyle/deployment/minready_log.go | 42 ---- .../workload/mutating/minready_deployment.go | 216 ++++++++++++++++++ .../mutating/minready_deployment_test.go | 135 +++++++++++ .../mutating/workload_update_handler.go | 24 +- .../mutating/workload_update_handler_test.go | 45 ++-- 16 files changed, 568 insertions(+), 254 deletions(-) create mode 100644 api/v1alpha1/deployment_types_test.go create mode 100644 api/v1beta1/deployment_types_test.go delete mode 100644 pkg/controller/batchrelease/control/partitionstyle/deployment/minready_log.go create mode 100644 pkg/webhook/workload/mutating/minready_deployment.go create mode 100644 pkg/webhook/workload/mutating/minready_deployment_test.go diff --git a/api/v1alpha1/deployment_types.go b/api/v1alpha1/deployment_types.go index 98fc474c..d7a39786 100644 --- a/api/v1alpha1/deployment_types.go +++ b/api/v1alpha1/deployment_types.go @@ -90,7 +90,7 @@ func SetDefaultDeploymentStrategy(strategy *DeploymentStrategy) { if strategy.RollingUpdate.MaxSurge == nil { // Set MaxSurge as 25% by default maxSurge := intstr.FromString("25%") - strategy.RollingUpdate.MaxUnavailable = &maxSurge + strategy.RollingUpdate.MaxSurge = &maxSurge } // Cannot allow maxSurge==0 && MaxUnavailable==0, otherwise, no pod can be updated when rolling update. diff --git a/api/v1alpha1/deployment_types_test.go b/api/v1alpha1/deployment_types_test.go new file mode 100644 index 00000000..8476ede7 --- /dev/null +++ b/api/v1alpha1/deployment_types_test.go @@ -0,0 +1,35 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import "testing" + +func TestSetDefaultDeploymentStrategyDefaultsMaxSurge(t *testing.T) { + strategy := &DeploymentStrategy{RollingStyle: PartitionRollingStyle} + + SetDefaultDeploymentStrategy(strategy) + + if strategy.RollingUpdate == nil { + t.Fatalf("rollingUpdate = nil, want defaulted") + } + if strategy.RollingUpdate.MaxSurge == nil || strategy.RollingUpdate.MaxSurge.StrVal != "25%" { + t.Fatalf("maxSurge = %v, want 25%%", strategy.RollingUpdate.MaxSurge) + } + if strategy.RollingUpdate.MaxUnavailable == nil || strategy.RollingUpdate.MaxUnavailable.StrVal != "25%" { + t.Fatalf("maxUnavailable = %v, want 25%%", strategy.RollingUpdate.MaxUnavailable) + } +} diff --git a/api/v1beta1/deployment_types.go b/api/v1beta1/deployment_types.go index 2b3beb9b..338beeeb 100644 --- a/api/v1beta1/deployment_types.go +++ b/api/v1beta1/deployment_types.go @@ -48,6 +48,11 @@ const ( MaxProgressSeconds = 1<<31 - 1 MaxReadySeconds = MaxProgressSeconds - 1 + // MinReady default values mirror Kubernetes Deployment defaults for fields + // snapshotted before the MinReadySeconds strategy inflates them. + MinReadyDefaultProgressDeadlineSeconds int32 = 600 + MinReadyDefaultMaxUnavailable = "25%" + // MinReadyOriginal*Annotation snapshot the user-specified Deployment strategy // fields before the MinReadySeconds strategy inflates them; they are used to // restore the Deployment on finalize. A Deployment carrying any of them is @@ -129,7 +134,7 @@ func SetDefaultDeploymentStrategy(strategy *DeploymentStrategy) { if strategy.RollingUpdate.MaxSurge == nil { // Set MaxSurge as 25% by default maxSurge := intstr.FromString("25%") - strategy.RollingUpdate.MaxUnavailable = &maxSurge + strategy.RollingUpdate.MaxSurge = &maxSurge } // Cannot allow maxSurge==0 && MaxUnavailable==0, otherwise, no pod can be updated when rolling update. diff --git a/api/v1beta1/deployment_types_test.go b/api/v1beta1/deployment_types_test.go new file mode 100644 index 00000000..ed79a6d5 --- /dev/null +++ b/api/v1beta1/deployment_types_test.go @@ -0,0 +1,35 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import "testing" + +func TestSetDefaultDeploymentStrategyDefaultsMaxSurge(t *testing.T) { + strategy := &DeploymentStrategy{RollingStyle: PartitionRollingStyle} + + SetDefaultDeploymentStrategy(strategy) + + if strategy.RollingUpdate == nil { + t.Fatalf("rollingUpdate = nil, want defaulted") + } + if strategy.RollingUpdate.MaxSurge == nil || strategy.RollingUpdate.MaxSurge.StrVal != "25%" { + t.Fatalf("maxSurge = %v, want 25%%", strategy.RollingUpdate.MaxSurge) + } + if strategy.RollingUpdate.MaxUnavailable == nil || strategy.RollingUpdate.MaxUnavailable.StrVal != "25%" { + t.Fatalf("maxUnavailable = %v, want 25%%", strategy.RollingUpdate.MaxUnavailable) + } +} diff --git a/docs/proposals/20260315-deployment-minready-seconds-progressive-delivery.md b/docs/proposals/20260315-deployment-minready-seconds-progressive-delivery.md index 6d9f9522..aabf4873 100644 --- a/docs/proposals/20260315-deployment-minready-seconds-progressive-delivery.md +++ b/docs/proposals/20260315-deployment-minready-seconds-progressive-delivery.md @@ -390,7 +390,7 @@ The controller maintains **no in-memory state**. After a controller restart or l | Before Initialize | No annotations | Re-run `Initialize` (idempotent). | | After Initialize | Four annotations present + fields inflated | Skip `Initialize`, proceed to `UpgradeBatch`. | | Mid-UpgradeBatch, `maxUnavailable` already at target | Annotations present, `maxUnavailable >= target` | Proceed to batch-ready check. | -| Mid-UpgradeBatch, `maxUnavailable` not yet at target | Annotations present, `maxUnavailable < target` | Re-issue the `UpgradeBatch` Patch. | +| Mid-UpgradeBatch, `maxUnavailable` not yet at target | Annotations present, `maxUnavailable < target` | Continue the MinReady maxUnavailable window reconcile on the next `UpgradeBatch` / `EnsureBatchPodsReadyAndLabeled` pass. | | Mid-Finalize, fields restored but annotations remain | Annotations present, fields not inflated | Re-issue the `Finalize` (deletes annotations). | | After Finalize | No annotations | No-op. | @@ -416,7 +416,7 @@ func isMinReadySecondsStrategy(rollout *appsv1beta1.Rollout, deployment *apps.De The guard splits the mutation into two paths. `isMinReadySecondsStrategy` only checks `Canary` because a Rollout cannot declare both `BlueGreen` and `Canary` — the validating webhook rejects that combination. When the feature gate is disabled mid-rollout, the Deployment's `DeploymentStrategyAnnotation` keeps the webhook symmetric with the executor's MinReady annotation fallback. -**Enrollment path (workload entering progressing).** Instead of pausing the Deployment, the webhook synchronously snapshots the original strategy fields into annotations and inflates `minReadySeconds` / `progressDeadlineSeconds` / `maxUnavailable` in place via `EnrollMinReadyDeployment`: +**Enrollment path (workload entering progressing).** Instead of pausing the Deployment, the webhook synchronously snapshots the original strategy fields into annotations and inflates `minReadySeconds` / `progressDeadlineSeconds` / `maxUnavailable` in place via the mutating package's local `enrollMinReadyDeployment` helper. This keeps admission code independent from partition-style controller internals: ```go if isMinReadySecondsStrategy(rollout, deployment) { @@ -425,7 +425,7 @@ if isMinReadySecondsStrategy(rollout, deployment) { // observes the user's original budget in the window between admission and // MinReadyControl.Initialize. Continuous releases refresh user-owned // availability annotations before re-inflation. - if err := partitiondeployment.EnrollMinReadyDeployment(newObj); err != nil { + if err := enrollMinReadyDeployment(newObj); err != nil { klog.Warningf("Skip MinReady enrollment for Deployment(%s/%s): %v", ...) } } else { diff --git a/pkg/controller/batchrelease/control/partitionstyle/control_plane.go b/pkg/controller/batchrelease/control/partitionstyle/control_plane.go index 6b63624a..e7f9a9f6 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/control_plane.go +++ b/pkg/controller/batchrelease/control/partitionstyle/control_plane.go @@ -85,10 +85,16 @@ func (rc *realBatchControlPlane) reportOperationFailed(controller Interface, rea klog.ErrorS(err, "Partition-style control plane failed", "release", klog.KObj(rc.release), "reason", reason) } -func (rc *realBatchControlPlane) Initialize() error { - controller, err := rc.BuildController() +func (rc *realBatchControlPlane) Initialize() (err error) { + controller := rc.Interface + var reportErr error + defer func() { + rc.reportOperationFailed(controller, "MinReadyInitializeFailed", reportErr) + }() + + controller, err = rc.BuildController() if err != nil { - rc.reportOperationFailed(rc.Interface, "MinReadyInitializeFailed", err) + reportErr = err return err } rc.bindMinReadyStatus(controller) @@ -96,7 +102,7 @@ func (rc *realBatchControlPlane) Initialize() error { // claim workload under our control err = controller.Initialize(rc.ctx, rc.release) if err != nil { - rc.reportOperationFailed(controller, "MinReadyInitializeFailed", err) + reportErr = err return err } if lifecycle, ok := controller.(MinReadyLifecycle); ok { @@ -117,10 +123,16 @@ func (rc *realBatchControlPlane) Initialize() error { return err } -func (rc *realBatchControlPlane) UpgradeBatch() error { - controller, err := rc.BuildController() +func (rc *realBatchControlPlane) UpgradeBatch() (err error) { + controller := rc.Interface + var reportErr error + defer func() { + rc.reportOperationFailed(controller, "MinReadyBatchingFailed", reportErr) + }() + + controller, err = rc.BuildController() if err != nil { - rc.reportOperationFailed(rc.Interface, "MinReadyBatchingFailed", err) + reportErr = err return err } rc.bindMinReadyStatus(controller) @@ -134,13 +146,13 @@ func (rc *realBatchControlPlane) UpgradeBatch() error { err = rc.countAndUpdateNoNeedUpdateReplicas() if err != nil { - rc.reportOperationFailed(controller, "MinReadyBatchingFailed", err) + reportErr = err return err } batchContext, err := controller.CalculateBatchContext(rc.release) if err != nil { - rc.reportOperationFailed(controller, "MinReadyBatchingFailed", err) + reportErr = err return err } klog.Infof("BatchRelease %v calculated context when upgrade batch: %s", @@ -148,12 +160,12 @@ func (rc *realBatchControlPlane) UpgradeBatch() error { err = controller.UpgradeBatch(rc.ctx, batchContext) if err != nil { - rc.reportOperationFailed(controller, "MinReadyBatchingFailed", err) + reportErr = err return err } if err := rc.patcher.PatchPodBatchLabel(batchContext); err != nil { - rc.reportOperationFailed(controller, "MinReadyBatchingFailed", err) + reportErr = err return err } if lifecycle, ok := controller.(MinReadyLifecycle); ok { @@ -162,10 +174,16 @@ func (rc *realBatchControlPlane) UpgradeBatch() error { return nil } -func (rc *realBatchControlPlane) EnsureBatchPodsReadyAndLabeled() error { - controller, err := rc.BuildController() +func (rc *realBatchControlPlane) EnsureBatchPodsReadyAndLabeled() (err error) { + controller := rc.Interface + var reportErr error + defer func() { + rc.reportOperationFailed(controller, "MinReadyBatchingFailed", reportErr) + }() + + controller, err = rc.BuildController() if err != nil { - rc.reportOperationFailed(rc.Interface, "MinReadyBatchingFailed", err) + reportErr = err return err } rc.bindMinReadyStatus(controller) @@ -181,7 +199,7 @@ func (rc *realBatchControlPlane) EnsureBatchPodsReadyAndLabeled() error { // the target calculated should be consistent with UpgradeBatch. batchContext, err := controller.CalculateBatchContext(rc.release) if err != nil { - rc.reportOperationFailed(controller, "MinReadyBatchingFailed", err) + reportErr = err return err } @@ -190,7 +208,7 @@ func (rc *realBatchControlPlane) EnsureBatchPodsReadyAndLabeled() error { if reconciler, ok := controller.(MinReadyDriftReconciler); ok { if err := reconciler.ReconcileMaxUnavailableDrift(rc.ctx, batchContext); err != nil { - rc.reportOperationFailed(controller, "MinReadyBatchingFailed", err) + reportErr = err return err } } @@ -207,11 +225,17 @@ func (rc *realBatchControlPlane) EnsureBatchPodsReadyAndLabeled() error { return nil } -func (rc *realBatchControlPlane) Finalize() error { - controller, err := rc.BuildController() +func (rc *realBatchControlPlane) Finalize() (err error) { + controller := rc.Interface + var reportErr error + defer func() { + rc.reportOperationFailed(controller, "MinReadyFinalizeFailed", reportErr) + }() + + controller, err = rc.BuildController() if err != nil { if err := client.IgnoreNotFound(err); err != nil { - rc.reportOperationFailed(rc.Interface, "MinReadyFinalizeFailed", err) + reportErr = err return err } return nil @@ -220,7 +244,7 @@ func (rc *realBatchControlPlane) Finalize() error { // release workload control info and clean up resources if it needs if err := controller.Finalize(rc.ctx, rc.release); err != nil { - rc.reportOperationFailed(controller, "MinReadyFinalizeFailed", err) + reportErr = err return err } if lifecycle, ok := controller.(MinReadyLifecycle); ok { diff --git a/pkg/controller/batchrelease/control/partitionstyle/control_plane_test.go b/pkg/controller/batchrelease/control/partitionstyle/control_plane_test.go index 30ece0c4..b81219b0 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/control_plane_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/control_plane_test.go @@ -50,14 +50,16 @@ type fakePartitionController struct { batchCtx *batchcontext.BatchContext calcErr error - initErr error - upgradeErr error - finalizeErr error + initErr error + upgradeErr error + finalizeErr error + reconcileErr error buildCalls int initCalls int upgradeCalls int finalizeCalls int + reconcileCalls int calculateCalls int listCalls int @@ -168,6 +170,11 @@ func (f *fakePartitionController) UpgradeBatch(context.Context, *batchcontext.Ba return f.upgradeErr } +func (f *fakePartitionController) ReconcileMaxUnavailableDrift(context.Context, *batchcontext.BatchContext) error { + f.reconcileCalls++ + return f.reconcileErr +} + func (f *fakePartitionController) Finalize(context.Context, *v1beta1.BatchRelease) error { f.finalizeCalls++ return f.finalizeErr @@ -322,6 +329,12 @@ func TestControlPlaneEnsureBatchPodsReadyAndLabeled(t *testing.T) { if controller.calculateCalls != 1 { t.Fatalf("calculateCalls = %d, want 1", controller.calculateCalls) } + if controller.reconcileCalls != 1 { + t.Fatalf("reconcileCalls = %d, want 1", controller.reconcileCalls) + } + if degraded := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyDegraded); degraded != nil { + t.Fatalf("MinReadyDegraded condition = %#v, want nil for normal batch wait", degraded) + } }) t.Run("ready records batch ready", func(t *testing.T) { @@ -337,6 +350,26 @@ func TestControlPlaneEnsureBatchPodsReadyAndLabeled(t *testing.T) { t.Fatalf("MinReadyBatchReady condition = %#v", condition) } }) + + t.Run("drift reconcile error records degraded condition", func(t *testing.T) { + controller := &fakePartitionController{ + minReady: true, + reconcileErr: errors.Join(errors.New("window drift"), ErrMinReadyDriftDetected), + } + status := &v1beta1.BatchReleaseStatus{} + rc := newTestControlPlane(controller, status) + + if err := rc.EnsureBatchPodsReadyAndLabeled(); err == nil { + t.Fatalf("EnsureBatchPodsReadyAndLabeled() error = nil, want drift error") + } + if controller.reconcileCalls != 1 { + t.Fatalf("reconcileCalls = %d, want 1", controller.reconcileCalls) + } + condition := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyDegraded) + if condition == nil || condition.Reason != "MinReadyDegradedDriftDetected" { + t.Fatalf("MinReadyDegraded condition = %#v", condition) + } + }) } func TestControlPlaneFinalizeMinReadyPaths(t *testing.T) { diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_batch_context.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_batch_context.go index 4735b175..2b019614 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_batch_context.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_batch_context.go @@ -27,15 +27,12 @@ import ( "github.com/openkruise/rollouts/pkg/util" ) -func (mc *MinReadyControl) minReadyUpdatedReadyReplicas(updateRevision string) (int32, error) { +func (mc *MinReadyControl) minReadyUpdatedReadyReplicas(updateRevision string, pods []*corev1.Pod) (int32, error) { original, err := parseOriginalDeploymentStrategy(mc.object.Annotations) if err != nil { return 0, err } - if _, err := mc.ListOwnedPods(); err != nil { - return 0, err - } - return countUpdatedAvailablePods(mc.pods, updateRevision, originalMinReadySeconds(original), time.Now()), nil + return countUpdatedAvailablePods(pods, updateRevision, originalMinReadySeconds(original), time.Now()), nil } func countUpdatedAvailablePods(pods []*corev1.Pod, updateRevision string, minReadySeconds int32, now time.Time) int32 { diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_constants.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_constants.go index 569db504..f9f7d9ee 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_constants.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_constants.go @@ -35,8 +35,8 @@ const ( AnnotationOriginalProgressDeadlineSeconds = v1beta1.MinReadyOriginalProgressDeadlineSecondsAnnotation AnnotationOriginalMaxUnavailable = v1beta1.MinReadyOriginalMaxUnavailableAnnotation - DefaultProgressDeadlineSeconds int32 = 600 - DefaultMaxUnavailable = "25%" + DefaultProgressDeadlineSeconds int32 = v1beta1.MinReadyDefaultProgressDeadlineSeconds + DefaultMaxUnavailable = v1beta1.MinReadyDefaultMaxUnavailable InflatedMinReadySeconds int32 = v1beta1.MaxReadySeconds InflatedProgressDeadlineSeconds int32 = v1beta1.MaxProgressSeconds @@ -44,9 +44,9 @@ const ( var AllOriginalAnnotations = v1beta1.MinReadyOriginalAnnotations -func serializeOriginalInt32(value *int32) string { +func serializeOriginalInt32(value *int32, defaultValue int32) string { if value == nil { - return strconv.FormatInt(int64(DefaultProgressDeadlineSeconds), 10) + return strconv.FormatInt(int64(defaultValue), 10) } return strconv.FormatInt(int64(*value), 10) } diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go index 270d9b55..ba06ed64 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go @@ -173,19 +173,16 @@ func (mc *MinReadyControl) reconcileMaxUnavailable(ctx context.Context, batchCon if int32(current) == target { return nil } - warningS(nil, "MinReady maxUnavailable exceeds target, reducing", + klog.V(0).InfoS("MinReady maxUnavailable exceeds target, reducing", "batch", batchContext.CurrentBatch, "deployment", klog.KObj(mc.object), "maxUnavailable", current, "target", target) return mc.patchMaxUnavailable(ctx, int(target)) } - // Sliding window (P0-3): advance maxUnavailable by the user's original - // budget one step at a time, waiting for the current window's pods to - // become available before widening the budget again. Without this, a large - // batch target (e.g. 99 after a 1-pod canary) is written in a single patch - // and the native controller tears down far more pods than the user's - // declared maxUnavailable in one shot, breaking the anti-disturbance safety - // the batched release is supposed to provide. + // Sliding window: keep no more than the user's original maxUnavailable + // budget worth of updated-but-not-ready pods in flight. As each updated pod + // becomes ready, top up the window immediately instead of waiting for the + // whole current window to become ready. step, err := mc.maxUnavailableStep(batchContext.Replicas) if err != nil { return fmt.Errorf("MinReadyControl.reconcileMaxUnavailable[%d]: %w", batchContext.CurrentBatch, err) @@ -195,11 +192,10 @@ func (mc *MinReadyControl) reconcileMaxUnavailable(ctx context.Context, batchCon // control; there is no budget to slide, so drive the batch directly. return mc.patchMaxUnavailable(ctx, int(target)) } - if batchContext.UpdatedReadyReplicas < int32(current) { - // current window not yet filled; keep the budget and wait for readiness + next := int(batchContext.UpdatedReadyReplicas) + step + if next <= current { return nil } - next := current + step if int32(next) > target { next = int(target) } @@ -280,10 +276,9 @@ func (mc *MinReadyControl) Finalize(ctx context.Context, _ *v1beta1.BatchRelease func (mc *MinReadyControl) CalculateBatchContext(release *v1beta1.BatchRelease) (*batchcontext.BatchContext, error) { rolloutID := release.Spec.ReleasePlan.RolloutID - if rolloutID != "" { - if _, err := mc.ListOwnedPods(); err != nil { - return nil, fmt.Errorf("MinReadyControl.CalculateBatchContext: %w", err) - } + pods, err := mc.ListOwnedPods() + if err != nil { + return nil, fmt.Errorf("MinReadyControl.CalculateBatchContext: %w", err) } currentBatch := release.Status.CanaryStatus.CurrentBatch @@ -292,7 +287,7 @@ func (mc *MinReadyControl) CalculateBatchContext(release *v1beta1.BatchRelease) if err != nil { return nil, fmt.Errorf("MinReadyControl.CalculateBatchContext: %w", err) } - updatedReadyReplicas, err := mc.minReadyUpdatedReadyReplicas(release.Status.UpdateRevision) + updatedReadyReplicas, err := mc.minReadyUpdatedReadyReplicas(release.Status.UpdateRevision, pods) if err != nil { return nil, fmt.Errorf("MinReadyControl.CalculateBatchContext: %w", err) } @@ -307,7 +302,7 @@ func (mc *MinReadyControl) CalculateBatchContext(release *v1beta1.BatchRelease) DesiredUpdatedReplicas: desiredUpdatedReplicas, DesiredPartition: desiredPartition, FailureThreshold: release.Spec.ReleasePlan.FailureThreshold, - Pods: mc.pods, + Pods: pods, }, nil } @@ -352,8 +347,8 @@ func writeOriginalAvailabilityAnnotations(original, modified *apps.Deployment) { if modified.Annotations == nil { modified.Annotations = map[string]string{} } - modified.Annotations[AnnotationOriginalMinReadySeconds] = serializeOriginalInt32(&original.Spec.MinReadySeconds) - modified.Annotations[AnnotationOriginalProgressDeadlineSeconds] = serializeOriginalInt32(original.Spec.ProgressDeadlineSeconds) + modified.Annotations[AnnotationOriginalMinReadySeconds] = serializeOriginalInt32(&original.Spec.MinReadySeconds, 0) + modified.Annotations[AnnotationOriginalProgressDeadlineSeconds] = serializeOriginalInt32(original.Spec.ProgressDeadlineSeconds, DefaultProgressDeadlineSeconds) } func originalMaxUnavailable(deployment *apps.Deployment) *intstr.IntOrString { @@ -377,45 +372,6 @@ func inflateDeploymentStrategy(deployment *apps.Deployment) { deployment.Spec.Strategy.RollingUpdate.MaxUnavailable = &maxUnavailable } -// EnrollMinReadyDeployment snapshots the original strategy fields into -// annotations and inflates them in place. The workload mutating webhook calls -// it when a Deployment enters rollout progressing, so the native controller -// never observes the original maxUnavailable/minReadySeconds budget between -// admission and MinReadyControl.Initialize. If a continuous release updates the -// user-owned availability fields while MinReady annotations already exist, -// enrollment refreshes those original annotations before re-inflating. -func EnrollMinReadyDeployment(deployment *apps.Deployment) error { - if err := validateDeploymentStrategyType(deployment); err != nil { - return err - } - snapshot := deployment.DeepCopy() - if err := enrollOriginalAnnotations(snapshot, deployment); err != nil { - return err - } - inflateDeploymentStrategy(deployment) - return nil -} - -func enrollOriginalAnnotations(snapshot, target *apps.Deployment) error { - if !hasAnyOriginalAnnotation(snapshot.Annotations) { - writeOriginalAnnotations(snapshot, target) - return nil - } - if err := ensureOriginalAnnotations(snapshot); err != nil { - return err - } - if err := validateInflatedDeploymentStrategy(snapshot); err != nil { - if !hasOriginalAvailabilityChange(snapshot) { - return err - } - if err := validateMinReadyRefreshableDeployment(snapshot); err != nil { - return err - } - writeOriginalAvailabilityAnnotations(snapshot, target) - } - return nil -} - func (mc *MinReadyControl) ensureInflatedDeploymentStrategy(ctx context.Context) error { if err := validateDeploymentStrategyType(mc.object); err != nil { return err @@ -457,24 +413,6 @@ func validateInflatedDeploymentStrategy(deployment *apps.Deployment) error { return nil } -func hasOriginalAvailabilityChange(deployment *apps.Deployment) bool { - if deployment.Spec.MinReadySeconds != InflatedMinReadySeconds { - return true - } - return deployment.Spec.ProgressDeadlineSeconds == nil || - *deployment.Spec.ProgressDeadlineSeconds != InflatedProgressDeadlineSeconds -} - -func validateMinReadyRefreshableDeployment(deployment *apps.Deployment) error { - if deployment.Spec.Paused { - return fmt.Errorf("%w: deployment is paused", partitionstyle.ErrMinReadyDriftDetected) - } - if deployment.Spec.Strategy.RollingUpdate == nil { - return fmt.Errorf("%w: rollingUpdate is nil", partitionstyle.ErrMinReadyDriftDetected) - } - return nil -} - func validateDeploymentStrategyType(deployment *apps.Deployment) error { if deployment.Spec.Strategy.Type != apps.RollingUpdateDeploymentStrategyType { return fmt.Errorf("%w: deployment strategy type %s is not RollingUpdate", diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go index 439401c6..66761a0a 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go @@ -126,6 +126,7 @@ func TestMinReadyInitializeRejectsEmptyOriginalAnnotations(t *testing.T) { func TestMinReadyInitializeSerializesKubernetesDefaults(t *testing.T) { _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") deployment := newMinReadyDeployment() + deployment.Spec.MinReadySeconds = 0 deployment.Spec.ProgressDeadlineSeconds = nil deployment.Spec.Strategy.RollingUpdate = nil control := newBuiltMinReadyControl(t, deployment) @@ -135,6 +136,7 @@ func TestMinReadyInitializeSerializesKubernetesDefaults(t *testing.T) { } got := fetchMinReadyDeployment(t, control) + assertAnnotation(t, got.Annotations, AnnotationOriginalMinReadySeconds, "0") assertAnnotation(t, got.Annotations, AnnotationOriginalProgressDeadlineSeconds, "600") assertAnnotation(t, got.Annotations, AnnotationOriginalMaxUnavailable, "25%") assertMinReadyInflatedWithoutSurgeRequirement(t, got) @@ -506,60 +508,6 @@ func TestMinReadyCalculateBatchContextReplicasZero(t *testing.T) { } } -func TestEnrollMinReadyDeploymentSnapshotsAndInflates(t *testing.T) { - // P1-6: enrollment runs at admission time so the native controller never - // observes the user's original budget before Initialize lands. - deployment := newMinReadyDeployment() - if err := EnrollMinReadyDeployment(deployment); err != nil { - t.Fatalf("EnrollMinReadyDeployment failed: %v", err) - } - if !hasAnyOriginalAnnotation(deployment.Annotations) { - t.Fatalf("expected original annotations to be written") - } - if deployment.Annotations[AnnotationOriginalMinReadySeconds] != "7" { - t.Fatalf("original min-ready-seconds = %q, want 7", deployment.Annotations[AnnotationOriginalMinReadySeconds]) - } - assertMinReadyInflated(t, deployment) -} - -func TestEnrollMinReadyDeploymentValidatesExistingAnnotations(t *testing.T) { - // When annotations already exist (e.g. a re-admission), enrollment validates - // the inflated state instead of rewriting the snapshot. - deployment := newInflatedMinReadyDeployment() - addMinReadyOriginalAnnotations(deployment) - original := deployment.Annotations[AnnotationOriginalMinReadySeconds] - if err := EnrollMinReadyDeployment(deployment); err != nil { - t.Fatalf("EnrollMinReadyDeployment failed: %v", err) - } - if deployment.Annotations[AnnotationOriginalMinReadySeconds] != original { - t.Fatalf("original annotation was rewritten: %q -> %q", original, deployment.Annotations[AnnotationOriginalMinReadySeconds]) - } -} - -func TestEnrollMinReadyDeploymentRefreshesAvailabilityAnnotationsForContinuousRelease(t *testing.T) { - deployment := newInflatedMinReadyDeployment() - addMinReadyOriginalAnnotations(deployment) - deployment.Spec.MinReadySeconds = 9 - deployment.Spec.ProgressDeadlineSeconds = pointer.Int32(90) - - if err := EnrollMinReadyDeployment(deployment); err != nil { - t.Fatalf("EnrollMinReadyDeployment failed: %v", err) - } - - assertAnnotation(t, deployment.Annotations, AnnotationOriginalMinReadySeconds, "9") - assertAnnotation(t, deployment.Annotations, AnnotationOriginalProgressDeadlineSeconds, "90") - assertAnnotation(t, deployment.Annotations, AnnotationOriginalMaxUnavailable, "25%") - assertMinReadyInflated(t, deployment) -} - -func TestEnrollMinReadyDeploymentRejectsRecreate(t *testing.T) { - deployment := newMinReadyDeployment() - deployment.Spec.Strategy.Type = apps.RecreateDeploymentStrategyType - if err := EnrollMinReadyDeployment(deployment); err == nil { - t.Fatalf("EnrollMinReadyDeployment accepted Recreate strategy, want error") - } -} - func TestMinReadyFinalizeRestoresAfterGateDisabled(t *testing.T) { // P1-4: even with the feature gate disabled, a Deployment carrying MinReady // original annotations must finalize cleanly and restore the original fields. @@ -586,9 +534,9 @@ func TestMinReadyFinalizeRestoresAfterGateDisabled(t *testing.T) { func TestMinReadySlidingWindowAdvancesStepByStep(t *testing.T) { // P0-3: a large batch target must not be written to maxUnavailable in a - // single patch. reconcileMaxUnavailable advances by the user's original - // maxUnavailable (25% of 10 = 3) one step at a time, and only widens the - // budget once the current window's pods are available. + // single patch. reconcileMaxUnavailable keeps at most the user's original + // maxUnavailable (25% of 10 = 3) worth of updated-but-not-ready pods in + // flight, topping up the window as individual pods become ready. _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") deployment := newInflatedMinReadyDeployment() addMinReadyOriginalAnnotations(deployment) @@ -605,8 +553,9 @@ func TestMinReadySlidingWindowAdvancesStepByStep(t *testing.T) { comment string }{ {0, 3, "empty window advances to first step"}, - {1, 3, "window not filled holds budget"}, - {3, 6, "filled window advances one step"}, + {1, 4, "one ready pod tops up one slot"}, + {2, 5, "partial readiness keeps topping up"}, + {4, 7, "does not wait for the whole current window"}, {6, 9, "advance caps at target"}, {9, 9, "at target holds"}, } diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_log.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_log.go deleted file mode 100644 index 8f4cdf63..00000000 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_log.go +++ /dev/null @@ -1,42 +0,0 @@ -/* -Copyright 2026 The Kruise Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package deployment - -import ( - "fmt" - "strings" - - "k8s.io/klog/v2" -) - -// warningS logs at warning severity with the same call shape as klog.InfoS/ErrorS. -// klog v2.120.1 does not expose WarningS, so MinReady uses this helper locally. -func warningS(err error, msg string, keysAndValues ...interface{}) { - klog.Warning(formatStructuredLog(err, msg, keysAndValues...)) -} - -func formatStructuredLog(err error, msg string, keysAndValues ...interface{}) string { - var b strings.Builder - b.WriteString(msg) - if err != nil { - fmt.Fprintf(&b, " err=%v", err) - } - for i := 0; i+1 < len(keysAndValues); i += 2 { - fmt.Fprintf(&b, " %v=%v", keysAndValues[i], keysAndValues[i+1]) - } - return b.String() -} diff --git a/pkg/webhook/workload/mutating/minready_deployment.go b/pkg/webhook/workload/mutating/minready_deployment.go new file mode 100644 index 00000000..0fb58eae --- /dev/null +++ b/pkg/webhook/workload/mutating/minready_deployment.go @@ -0,0 +1,216 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package mutating + +import ( + "fmt" + "strconv" + "strings" + + apps "k8s.io/api/apps/v1" + "k8s.io/apimachinery/pkg/util/intstr" + + appsv1beta1 "github.com/openkruise/rollouts/api/v1beta1" +) + +const ( + inflatedMinReadySeconds int32 = appsv1beta1.MaxReadySeconds + inflatedProgressDeadlineSeconds int32 = appsv1beta1.MaxProgressSeconds +) + +// enrollMinReadyDeployment snapshots the original strategy fields into +// annotations and inflates them in place. It lives in the webhook package so +// admission code does not depend on controller internals. +func enrollMinReadyDeployment(deployment *apps.Deployment) error { + if err := validateMinReadyDeploymentStrategyType(deployment); err != nil { + return err + } + snapshot := deployment.DeepCopy() + if err := enrollMinReadyOriginalAnnotations(snapshot, deployment); err != nil { + return err + } + inflateMinReadyDeploymentStrategy(deployment) + return nil +} + +func enrollMinReadyOriginalAnnotations(snapshot, target *apps.Deployment) error { + if !appsv1beta1.HasMinReadyOriginalAnnotations(snapshot.Annotations) { + writeMinReadyOriginalAnnotations(snapshot, target) + return nil + } + if err := ensureMinReadyOriginalAnnotations(snapshot); err != nil { + return err + } + if err := validateMinReadyInflatedDeploymentStrategy(snapshot); err != nil { + if !hasMinReadyOriginalAvailabilityChange(snapshot) { + return err + } + if err := validateMinReadyRefreshableDeployment(snapshot); err != nil { + return err + } + writeMinReadyOriginalAvailabilityAnnotations(snapshot, target) + } + return nil +} + +func writeMinReadyOriginalAnnotations(original, modified *apps.Deployment) { + writeMinReadyOriginalAvailabilityAnnotations(original, modified) + modified.Annotations[appsv1beta1.MinReadyOriginalMaxUnavailableAnnotation] = + serializeMinReadyOriginalIntOrString(originalMinReadyMaxUnavailable(original)) +} + +func writeMinReadyOriginalAvailabilityAnnotations(original, modified *apps.Deployment) { + if modified.Annotations == nil { + modified.Annotations = map[string]string{} + } + modified.Annotations[appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation] = + serializeMinReadyOriginalInt32(&original.Spec.MinReadySeconds, 0) + modified.Annotations[appsv1beta1.MinReadyOriginalProgressDeadlineSecondsAnnotation] = + serializeMinReadyOriginalInt32(original.Spec.ProgressDeadlineSeconds, appsv1beta1.MinReadyDefaultProgressDeadlineSeconds) +} + +func serializeMinReadyOriginalInt32(value *int32, defaultValue int32) string { + if value == nil { + return strconv.FormatInt(int64(defaultValue), 10) + } + return strconv.FormatInt(int64(*value), 10) +} + +func serializeMinReadyOriginalIntOrString(value *intstr.IntOrString) string { + if value == nil { + return appsv1beta1.MinReadyDefaultMaxUnavailable + } + if value.Type == intstr.String { + return value.StrVal + } + return strconv.FormatInt(int64(value.IntVal), 10) +} + +func originalMinReadyMaxUnavailable(deployment *apps.Deployment) *intstr.IntOrString { + if deployment.Spec.Strategy.RollingUpdate == nil { + return nil + } + return deployment.Spec.Strategy.RollingUpdate.MaxUnavailable +} + +func ensureMinReadyOriginalAnnotations(deployment *apps.Deployment) error { + if _, err := parseMinReadyOriginalInt32(deployment.Annotations, appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation); err != nil { + return err + } + if _, err := parseMinReadyOriginalInt32(deployment.Annotations, appsv1beta1.MinReadyOriginalProgressDeadlineSecondsAnnotation); err != nil { + return err + } + if _, err := parseMinReadyOriginalIntOrString(deployment.Annotations, appsv1beta1.MinReadyOriginalMaxUnavailableAnnotation); err != nil { + return err + } + return nil +} + +func parseMinReadyOriginalInt32(annotations map[string]string, key string) (*int32, error) { + raw, ok := annotations[key] + if !ok { + return nil, fmt.Errorf("annotation %s missing", key) + } + if raw == "" { + return nil, fmt.Errorf("annotation %s present but empty", key) + } + n, err := strconv.ParseInt(raw, 10, 32) + if err != nil { + return nil, fmt.Errorf("annotation %s malformed int32: %v", key, err) + } + v := int32(n) + return &v, nil +} + +func parseMinReadyOriginalIntOrString(annotations map[string]string, key string) (*intstr.IntOrString, error) { + raw, ok := annotations[key] + if !ok { + return nil, fmt.Errorf("annotation %s missing", key) + } + if raw == "" { + return nil, fmt.Errorf("annotation %s present but empty", key) + } + if strings.HasSuffix(raw, "%") { + if _, err := strconv.Atoi(strings.TrimSuffix(raw, "%")); err != nil { + return nil, fmt.Errorf("annotation %s malformed percent: %v", key, err) + } + v := intstr.FromString(raw) + return &v, nil + } + n, err := strconv.Atoi(raw) + if err != nil { + return nil, fmt.Errorf("annotation %s malformed int: %v", key, err) + } + v := intstr.FromInt(n) + return &v, nil +} + +func inflateMinReadyDeploymentStrategy(deployment *apps.Deployment) { + progressDeadlineSeconds := inflatedProgressDeadlineSeconds + maxUnavailable := intstr.FromInt(0) + deployment.Spec.Paused = false + deployment.Spec.MinReadySeconds = inflatedMinReadySeconds + deployment.Spec.ProgressDeadlineSeconds = &progressDeadlineSeconds + if deployment.Spec.Strategy.RollingUpdate == nil { + deployment.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{} + } + deployment.Spec.Strategy.RollingUpdate.MaxUnavailable = &maxUnavailable +} + +func validateMinReadyInflatedDeploymentStrategy(deployment *apps.Deployment) error { + if err := validateMinReadyDeploymentStrategyType(deployment); err != nil { + return err + } + if deployment.Spec.Paused { + return fmt.Errorf("deployment is paused") + } + if deployment.Spec.MinReadySeconds != inflatedMinReadySeconds { + return fmt.Errorf("minReadySeconds=%d want %d", deployment.Spec.MinReadySeconds, inflatedMinReadySeconds) + } + if deployment.Spec.ProgressDeadlineSeconds == nil || *deployment.Spec.ProgressDeadlineSeconds != inflatedProgressDeadlineSeconds { + return fmt.Errorf("progressDeadlineSeconds=%v want %d", deployment.Spec.ProgressDeadlineSeconds, inflatedProgressDeadlineSeconds) + } + if deployment.Spec.Strategy.RollingUpdate == nil { + return fmt.Errorf("rollingUpdate is nil") + } + return nil +} + +func hasMinReadyOriginalAvailabilityChange(deployment *apps.Deployment) bool { + if deployment.Spec.MinReadySeconds != inflatedMinReadySeconds { + return true + } + return deployment.Spec.ProgressDeadlineSeconds == nil || + *deployment.Spec.ProgressDeadlineSeconds != inflatedProgressDeadlineSeconds +} + +func validateMinReadyRefreshableDeployment(deployment *apps.Deployment) error { + if deployment.Spec.Paused { + return fmt.Errorf("deployment is paused") + } + if deployment.Spec.Strategy.RollingUpdate == nil { + return fmt.Errorf("rollingUpdate is nil") + } + return nil +} + +func validateMinReadyDeploymentStrategyType(deployment *apps.Deployment) error { + if deployment.Spec.Strategy.Type != apps.RollingUpdateDeploymentStrategyType { + return fmt.Errorf("deployment strategy type %s is not RollingUpdate", deployment.Spec.Strategy.Type) + } + return nil +} diff --git a/pkg/webhook/workload/mutating/minready_deployment_test.go b/pkg/webhook/workload/mutating/minready_deployment_test.go new file mode 100644 index 00000000..1c8db757 --- /dev/null +++ b/pkg/webhook/workload/mutating/minready_deployment_test.go @@ -0,0 +1,135 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package mutating + +import ( + "testing" + + apps "k8s.io/api/apps/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/pointer" + + appsv1beta1 "github.com/openkruise/rollouts/api/v1beta1" +) + +func TestEnrollMinReadyDeploymentSnapshotsAndInflates(t *testing.T) { + deployment := newWebhookMinReadyDeployment() + + if err := enrollMinReadyDeployment(deployment); err != nil { + t.Fatalf("enrollMinReadyDeployment failed: %v", err) + } + + assertWebhookMinReadyAnnotation(t, deployment, appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation, "7") + assertWebhookMinReadyAnnotation(t, deployment, appsv1beta1.MinReadyOriginalProgressDeadlineSecondsAnnotation, "60") + assertWebhookMinReadyAnnotation(t, deployment, appsv1beta1.MinReadyOriginalMaxUnavailableAnnotation, "25%") + assertWebhookMinReadyInflated(t, deployment) +} + +func TestEnrollMinReadyDeploymentValidatesExistingAnnotations(t *testing.T) { + deployment := newWebhookInflatedMinReadyDeployment() + addWebhookMinReadyOriginalAnnotations(deployment) + original := deployment.Annotations[appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation] + + if err := enrollMinReadyDeployment(deployment); err != nil { + t.Fatalf("enrollMinReadyDeployment failed: %v", err) + } + + if deployment.Annotations[appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation] != original { + t.Fatalf("original annotation was rewritten: %q -> %q", original, deployment.Annotations[appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation]) + } +} + +func TestEnrollMinReadyDeploymentRefreshesAvailabilityAnnotationsForContinuousRelease(t *testing.T) { + deployment := newWebhookInflatedMinReadyDeployment() + addWebhookMinReadyOriginalAnnotations(deployment) + deployment.Spec.MinReadySeconds = 9 + deployment.Spec.ProgressDeadlineSeconds = pointer.Int32(90) + + if err := enrollMinReadyDeployment(deployment); err != nil { + t.Fatalf("enrollMinReadyDeployment failed: %v", err) + } + + assertWebhookMinReadyAnnotation(t, deployment, appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation, "9") + assertWebhookMinReadyAnnotation(t, deployment, appsv1beta1.MinReadyOriginalProgressDeadlineSecondsAnnotation, "90") + assertWebhookMinReadyAnnotation(t, deployment, appsv1beta1.MinReadyOriginalMaxUnavailableAnnotation, "25%") + assertWebhookMinReadyInflated(t, deployment) +} + +func TestEnrollMinReadyDeploymentRejectsRecreate(t *testing.T) { + deployment := newWebhookMinReadyDeployment() + deployment.Spec.Strategy.Type = apps.RecreateDeploymentStrategyType + + if err := enrollMinReadyDeployment(deployment); err == nil { + t.Fatalf("enrollMinReadyDeployment accepted Recreate strategy, want error") + } +} + +func newWebhookMinReadyDeployment() *apps.Deployment { + progressDeadline := int32(60) + maxUnavailable := intstr.FromString("25%") + maxSurge := intstr.FromInt(1) + return &apps.Deployment{ + ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{}}, + Spec: apps.DeploymentSpec{ + MinReadySeconds: 7, + ProgressDeadlineSeconds: &progressDeadline, + Strategy: apps.DeploymentStrategy{ + Type: apps.RollingUpdateDeploymentStrategyType, + RollingUpdate: &apps.RollingUpdateDeployment{ + MaxUnavailable: &maxUnavailable, + MaxSurge: &maxSurge, + }, + }, + }, + } +} + +func newWebhookInflatedMinReadyDeployment() *apps.Deployment { + deployment := newWebhookMinReadyDeployment() + inflateMinReadyDeploymentStrategy(deployment) + return deployment +} + +func addWebhookMinReadyOriginalAnnotations(deployment *apps.Deployment) { + if deployment.Annotations == nil { + deployment.Annotations = map[string]string{} + } + deployment.Annotations[appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation] = "7" + deployment.Annotations[appsv1beta1.MinReadyOriginalProgressDeadlineSecondsAnnotation] = "60" + deployment.Annotations[appsv1beta1.MinReadyOriginalMaxUnavailableAnnotation] = "25%" +} + +func assertWebhookMinReadyAnnotation(t *testing.T, deployment *apps.Deployment, key, want string) { + t.Helper() + if got := deployment.Annotations[key]; got != want { + t.Fatalf("annotation %s = %q, want %q", key, got, want) + } +} + +func assertWebhookMinReadyInflated(t *testing.T, deployment *apps.Deployment) { + t.Helper() + if deployment.Spec.MinReadySeconds != inflatedMinReadySeconds { + t.Fatalf("minReadySeconds = %d, want %d", deployment.Spec.MinReadySeconds, inflatedMinReadySeconds) + } + if deployment.Spec.ProgressDeadlineSeconds == nil || *deployment.Spec.ProgressDeadlineSeconds != inflatedProgressDeadlineSeconds { + t.Fatalf("progressDeadlineSeconds = %v, want %d", deployment.Spec.ProgressDeadlineSeconds, inflatedProgressDeadlineSeconds) + } + if unavailable := deployment.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 0 { + t.Fatalf("maxUnavailable = %v, want 0", unavailable) + } +} diff --git a/pkg/webhook/workload/mutating/workload_update_handler.go b/pkg/webhook/workload/mutating/workload_update_handler.go index ed39bceb..a1bc2b5c 100644 --- a/pkg/webhook/workload/mutating/workload_update_handler.go +++ b/pkg/webhook/workload/mutating/workload_update_handler.go @@ -41,7 +41,6 @@ import ( appsv1alpha1 "github.com/openkruise/rollouts/api/v1alpha1" appsv1beta1 "github.com/openkruise/rollouts/api/v1beta1" - partitiondeployment "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment" "github.com/openkruise/rollouts/pkg/feature" "github.com/openkruise/rollouts/pkg/util" utilclient "github.com/openkruise/rollouts/pkg/util/client" @@ -243,7 +242,7 @@ func (h *WorkloadHandler) handleDeployment(newObj, oldObj *apps.Deployment) (boo modified := false if isMinReadySecondsStrategy(rollout, newObj) { if isEffectiveDeploymentRevisionChange(oldObj, newObj) { - if err := partitiondeployment.EnrollMinReadyDeployment(newObj); err != nil { + if err := enrollMinReadyDeployment(newObj); err != nil { klog.Warningf("Skip MinReady continuous enrollment for Deployment(%s/%s): %v", newObj.Namespace, newObj.Name, err) return enforceMinReadyInflation(newObj), nil } @@ -344,7 +343,7 @@ func (h *WorkloadHandler) handleDeployment(newObj, oldObj *apps.Deployment) (boo // so the native controller never observes the user's original budget in the // window between admission and MinReadyControl.Initialize. Continuous // releases refresh user-owned availability annotations before re-inflation. - if err := partitiondeployment.EnrollMinReadyDeployment(newObj); err != nil { + if err := enrollMinReadyDeployment(newObj); err != nil { // Do not block admission; the controller's Initialize will surface a // degraded condition for an unsupported strategy instead. klog.Warningf("Skip MinReady enrollment for Deployment(%s/%s): %v", newObj.Namespace, newObj.Name, err) @@ -500,7 +499,7 @@ func isMinReadySecondsStrategy(rollout *appsv1beta1.Rollout, deployment *apps.De } func enforceMinReadyInflation(deployment *apps.Deployment) bool { - if !hasMinReadyOriginalAnnotations(deployment.Annotations) { + if !appsv1beta1.HasMinReadyOriginalAnnotations(deployment.Annotations) { return false } modified := false @@ -520,27 +519,18 @@ func enforceMinReadyInflation(deployment *apps.Deployment) bool { deployment.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{} modified = true } - if deployment.Spec.MinReadySeconds != partitiondeployment.InflatedMinReadySeconds { - deployment.Spec.MinReadySeconds = partitiondeployment.InflatedMinReadySeconds + if deployment.Spec.MinReadySeconds != inflatedMinReadySeconds { + deployment.Spec.MinReadySeconds = inflatedMinReadySeconds modified = true } - if deployment.Spec.ProgressDeadlineSeconds == nil || *deployment.Spec.ProgressDeadlineSeconds != partitiondeployment.InflatedProgressDeadlineSeconds { - progressDeadlineSeconds := partitiondeployment.InflatedProgressDeadlineSeconds + if deployment.Spec.ProgressDeadlineSeconds == nil || *deployment.Spec.ProgressDeadlineSeconds != inflatedProgressDeadlineSeconds { + progressDeadlineSeconds := inflatedProgressDeadlineSeconds deployment.Spec.ProgressDeadlineSeconds = &progressDeadlineSeconds modified = true } return modified } -func hasMinReadyOriginalAnnotations(annotations map[string]string) bool { - for _, key := range partitiondeployment.AllOriginalAnnotations { - if _, ok := annotations[key]; ok { - return true - } - } - return false -} - func setDeploymentStrategyAnnotation(strategy appsv1alpha1.DeploymentStrategy, d *apps.Deployment) { strategyAnno, _ := json.Marshal(&strategy) d.Annotations[appsv1alpha1.DeploymentStrategyAnnotation] = string(strategyAnno) diff --git a/pkg/webhook/workload/mutating/workload_update_handler_test.go b/pkg/webhook/workload/mutating/workload_update_handler_test.go index 151b8fce..db20a52b 100644 --- a/pkg/webhook/workload/mutating/workload_update_handler_test.go +++ b/pkg/webhook/workload/mutating/workload_update_handler_test.go @@ -44,7 +44,6 @@ import ( rolloutapi "github.com/openkruise/rollouts/api" appsv1alpha1 "github.com/openkruise/rollouts/api/v1alpha1" appsv1beta1 "github.com/openkruise/rollouts/api/v1beta1" - partitiondeployment "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment" "github.com/openkruise/rollouts/pkg/feature" "github.com/openkruise/rollouts/pkg/util" utilfeature "github.com/openkruise/rollouts/pkg/util/feature" @@ -440,12 +439,12 @@ func TestHandlerDeployment(t *testing.T) { // P1-6: enrollment snapshots original strategy fields and inflates // minReadySeconds/progressDeadline/maxUnavailable synchronously so the // native controller never observes the original budget before Initialize. - obj.Annotations[partitiondeployment.AnnotationOriginalMinReadySeconds] = "0" - obj.Annotations[partitiondeployment.AnnotationOriginalProgressDeadlineSeconds] = "600" - obj.Annotations[partitiondeployment.AnnotationOriginalMaxUnavailable] = "25%" + obj.Annotations[appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation] = "0" + obj.Annotations[appsv1beta1.MinReadyOriginalProgressDeadlineSecondsAnnotation] = "600" + obj.Annotations[appsv1beta1.MinReadyOriginalMaxUnavailableAnnotation] = "25%" obj.Spec.Paused = false - obj.Spec.MinReadySeconds = partitiondeployment.InflatedMinReadySeconds - pds := partitiondeployment.InflatedProgressDeadlineSeconds + obj.Spec.MinReadySeconds = inflatedMinReadySeconds + pds := inflatedProgressDeadlineSeconds obj.Spec.ProgressDeadlineSeconds = &pds maxUnavailable := intstr.FromInt(0) obj.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{MaxUnavailable: &maxUnavailable} @@ -652,15 +651,15 @@ func TestHandlerDeployment(t *testing.T) { oldObj := deploymentDemo.DeepCopy() oldObj.Spec.Template.Spec.Containers[0].Image = "echoserver:v2" oldObj.Annotations[util.InRolloutProgressingAnnotation] = `{"rolloutName":"rollout-demo","RolloutDone":false}` - oldObj.Annotations[partitiondeployment.AnnotationOriginalMinReadySeconds] = "7" - oldObj.Annotations[partitiondeployment.AnnotationOriginalProgressDeadlineSeconds] = "60" - oldObj.Annotations[partitiondeployment.AnnotationOriginalMaxUnavailable] = "25%" + oldObj.Annotations[appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation] = "7" + oldObj.Annotations[appsv1beta1.MinReadyOriginalProgressDeadlineSecondsAnnotation] = "60" + oldObj.Annotations[appsv1beta1.MinReadyOriginalMaxUnavailableAnnotation] = "25%" oldObj.Spec.Paused = false oldObj.Spec.Strategy.Type = apps.RollingUpdateDeploymentStrategyType maxUnavailable := intstr.FromInt(0) oldObj.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{MaxUnavailable: &maxUnavailable} - oldObj.Spec.MinReadySeconds = partitiondeployment.InflatedMinReadySeconds - inflatedPDS := partitiondeployment.InflatedProgressDeadlineSeconds + oldObj.Spec.MinReadySeconds = inflatedMinReadySeconds + inflatedPDS := inflatedProgressDeadlineSeconds oldObj.Spec.ProgressDeadlineSeconds = &inflatedPDS newObj := oldObj.DeepCopy() @@ -673,15 +672,15 @@ func TestHandlerDeployment(t *testing.T) { obj := deploymentDemo.DeepCopy() obj.Spec.Template.Spec.Containers[0].Image = "echoserver:v3" obj.Annotations[util.InRolloutProgressingAnnotation] = `{"rolloutName":"rollout-demo","RolloutDone":false}` - obj.Annotations[partitiondeployment.AnnotationOriginalMinReadySeconds] = "9" - obj.Annotations[partitiondeployment.AnnotationOriginalProgressDeadlineSeconds] = "90" - obj.Annotations[partitiondeployment.AnnotationOriginalMaxUnavailable] = "25%" + obj.Annotations[appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation] = "9" + obj.Annotations[appsv1beta1.MinReadyOriginalProgressDeadlineSecondsAnnotation] = "90" + obj.Annotations[appsv1beta1.MinReadyOriginalMaxUnavailableAnnotation] = "25%" obj.Spec.Paused = false obj.Spec.Strategy.Type = apps.RollingUpdateDeploymentStrategyType maxUnavailable := intstr.FromInt(0) obj.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{MaxUnavailable: &maxUnavailable} - obj.Spec.MinReadySeconds = partitiondeployment.InflatedMinReadySeconds - inflatedPDS := partitiondeployment.InflatedProgressDeadlineSeconds + obj.Spec.MinReadySeconds = inflatedMinReadySeconds + inflatedPDS := inflatedProgressDeadlineSeconds obj.Spec.ProgressDeadlineSeconds = &inflatedPDS return obj }, @@ -960,19 +959,19 @@ func TestIsMinReadySecondsStrategy(t *testing.T) { // inflatedMinReadyDeployment returns a Deployment in a healthy inflated MinReady // state: RollingUpdate, unpaused, with original-strategy annotations present. func inflatedMinReadyDeployment() *apps.Deployment { - pds := partitiondeployment.InflatedProgressDeadlineSeconds + pds := inflatedProgressDeadlineSeconds maxUnavailable := intstr.FromInt(0) return &apps.Deployment{ ObjectMeta: metav1.ObjectMeta{ Annotations: map[string]string{ - partitiondeployment.AnnotationOriginalMinReadySeconds: "0", - partitiondeployment.AnnotationOriginalProgressDeadlineSeconds: "600", - partitiondeployment.AnnotationOriginalMaxUnavailable: "25%", + appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation: "0", + appsv1beta1.MinReadyOriginalProgressDeadlineSecondsAnnotation: "600", + appsv1beta1.MinReadyOriginalMaxUnavailableAnnotation: "25%", }, }, Spec: apps.DeploymentSpec{ Paused: false, - MinReadySeconds: partitiondeployment.InflatedMinReadySeconds, + MinReadySeconds: inflatedMinReadySeconds, ProgressDeadlineSeconds: &pds, Strategy: apps.DeploymentStrategy{ Type: apps.RollingUpdateDeploymentStrategyType, @@ -1044,10 +1043,10 @@ func TestEnforceMinReadyInflation(t *testing.T) { if !enforceMinReadyInflation(d) { t.Fatalf("expected modification for deflated fields") } - if d.Spec.MinReadySeconds != partitiondeployment.InflatedMinReadySeconds { + if d.Spec.MinReadySeconds != inflatedMinReadySeconds { t.Fatalf("minReadySeconds not re-inflated: %d", d.Spec.MinReadySeconds) } - if d.Spec.ProgressDeadlineSeconds == nil || *d.Spec.ProgressDeadlineSeconds != partitiondeployment.InflatedProgressDeadlineSeconds { + if d.Spec.ProgressDeadlineSeconds == nil || *d.Spec.ProgressDeadlineSeconds != inflatedProgressDeadlineSeconds { t.Fatalf("progressDeadlineSeconds not re-inflated: %v", d.Spec.ProgressDeadlineSeconds) } }) From 95e8e80a7f52f35405a765641e6be5e2dca5c57f Mon Sep 17 00:00:00 2001 From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> Date: Sun, 28 Jun 2026 02:23:57 +0800 Subject: [PATCH 22/22] test: stabilize minready TC7 resume observation Signed-off-by: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com> --- .../minready/deployment_minready_helpers_test.go | 15 ++++++++++++--- test/e2e/minready/deployment_minready_test.go | 3 ++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/test/e2e/minready/deployment_minready_helpers_test.go b/test/e2e/minready/deployment_minready_helpers_test.go index 28ecb40f..8332ba41 100644 --- a/test/e2e/minready/deployment_minready_helpers_test.go +++ b/test/e2e/minready/deployment_minready_helpers_test.go @@ -134,6 +134,14 @@ func updateMinReadyE2EDeploymentVersion(namespace, version string) { } func resumeMinReadyE2ERollout(namespace, name string) { + resumedStep := markMinReadyE2ERolloutPausedStepReady(namespace, name) + if resumedStep < 0 { + return + } + waitMinReadyE2ERolloutStepTransitioned(namespace, name, resumedStep) +} + +func markMinReadyE2ERolloutPausedStepReady(namespace, name string) int32 { resumedStep := int32(-1) Eventually(func() bool { rollout := &v1beta1.Rollout{} @@ -149,9 +157,10 @@ func resumeMinReadyE2ERollout(namespace, name string) { body := fmt.Sprintf(`{"status":{"canaryStatus":{"currentStepState":"%s"}}}`, v1beta1.CanaryStepStateReady) return k8sClient.Status().Patch(context.TODO(), rollout, client.RawPatch(types.MergePatchType, []byte(body))) == nil }, 5*time.Minute, time.Second).Should(BeTrue()) - if resumedStep < 0 { - return - } + return resumedStep +} + +func waitMinReadyE2ERolloutStepTransitioned(namespace, name string, resumedStep int32) { Eventually(func() bool { rollout := &v1beta1.Rollout{} key := types.NamespacedName{Namespace: namespace, Name: name} diff --git a/test/e2e/minready/deployment_minready_test.go b/test/e2e/minready/deployment_minready_test.go index b95d8afe..15aecbd6 100644 --- a/test/e2e/minready/deployment_minready_test.go +++ b/test/e2e/minready/deployment_minready_test.go @@ -133,7 +133,8 @@ var _ = SIGDescribe("Deployment MinReadySeconds", func() { patchMinReadyE2EMaxUnavailable(namespace, 5) // Heal drift to the paused step's batch target (20% on 5 replicas => 1). expectMinReadyE2EInflatedMaxUnavailable(namespace, 1) - resumeMinReadyE2ERollout(namespace, rollout.Name) + // Observe the next batch target before the rollout can race ahead to a later batch. + markMinReadyE2ERolloutPausedStepReady(namespace, rollout.Name) // 50% batch target is also 3 on 5 replicas; wait for UpgradeBatch, not step 2 pause. waitMinReadyE2EInflatedMaxUnavailable(namespace, 3, 10*time.Minute) finishMinReadyE2ERollout(namespace, rollout.Name)