diff --git a/.github/workflows/e2e-deployment-minready-1.24.yaml b/.github/workflows/e2e-deployment-minready-1.24.yaml new file mode 100644 index 00000000..a1b746f7 --- /dev/null +++ b/.github/workflows/e2e-deployment-minready-1.24.yaml @@ -0,0 +1,113 @@ +name: E2E-Deployment-MinReady-1.24 + +on: + push: + branches: + - master + - release-* + pull_request: {} + workflow_dispatch: {} + +# Declare default permissions as read only. +permissions: read-all + +env: + # Common versions + GO_VERSION: '1.20' + KIND_CLUSTER_NAME: 'ci-testing' + KIND_VERSION: 'v0.14.0' + KIND_IMAGE: 'kindest/node:v1.24.6' + +jobs: + + rollout: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + submodules: true + - name: Setup Go + uses: actions/setup-go@0aaccfd150d50ccaeb58ebd88d36e91967a5f35b # v5.4.0 + with: + go-version: ${{ env.GO_VERSION }} + - name: Setup Kind Cluster + uses: helm/kind-action@a1b0e391336a6ee6713a0583f8c6240d70863de3 # v1.12.0 + with: + node_image: ${{ env.KIND_IMAGE }} + cluster_name: ${{ env.KIND_CLUSTER_NAME }} + config: ./test/kind-conf.yaml + version: ${{ env.KIND_VERSION }} + - name: Build image + run: | + export IMAGE="openkruise/kruise-rollout:e2e-${GITHUB_RUN_ID}" + docker build --pull --no-cache . -t $IMAGE + kind load docker-image --name=${KIND_CLUSTER_NAME} $IMAGE || { echo >&2 "kind not installed or error loading image: $IMAGE"; exit 1; } + - name: Install Kruise + run: | + set -ex + kubectl cluster-info + make helm + helm repo add openkruise https://openkruise.github.io/charts/ + helm repo update + helm install kruise openkruise/kruise --version 1.7.0 + for ((i=1;i<10;i++)); + do + set +e + PODS=$(kubectl get pod -n kruise-system | grep '1/1' | grep kruise-controller-manager | wc -l) + set -e + if [ "$PODS" -eq "2" ]; then + break + fi + sleep 3 + done + set +e + PODS=$(kubectl get pod -n kruise-system | grep '1/1' | grep kruise-controller-manager | wc -l) + set -e + if [ "$PODS" -eq "2" ]; then + echo "Wait for kruise-manager ready successfully" + else + echo "Timeout to wait for kruise-manager ready" + exit 1 + fi + - name: Install Kruise Rollout + run: | + set -ex + kubectl cluster-info + IMG=openkruise/kruise-rollout:e2e-${GITHUB_RUN_ID} ./scripts/deploy_kind.sh + # Enable MinReadySecondsStrategy feature gate. + kubectl patch deployment kruise-rollout-controller-manager -n kruise-rollout --type='json' \ + -p='[{"op": "replace", "path": "/spec/template/spec/containers/0/args/1", "value": "--feature-gates=AdvancedDeployment=true,MinReadySecondsStrategy=true"}]' + kubectl rollout status deployment/kruise-rollout-controller-manager -n kruise-rollout --timeout=120s + for ((i=1;i<10;i++)); + do + set +e + PODS=$(kubectl get pod -n kruise-rollout | grep '1/1' | wc -l) + set -e + if [ "$PODS" -eq "1" ]; then + break + fi + sleep 3 + done + set +e + PODS=$(kubectl get pod -n kruise-rollout | grep '1/1' | wc -l) + kubectl get node -o yaml + kubectl get all -n kruise-rollout -o yaml + set -e + if [ "$PODS" -eq "1" ]; then + echo "Wait for kruise-rollout ready successfully" + else + echo "Timeout to wait for kruise-rollout ready" + exit 1 + fi + - name: Run E2E Tests For Deployment MinReadySeconds + run: | + export KUBECONFIG=/home/runner/.kube/config + make ginkgo + set +e + ./bin/ginkgo -timeout 60m -v test/e2e/minready + retVal=$? + if [ "${retVal}" -ne 0 ];then + echo "test fail, dump kruise-rollout logs" + kubectl get pod -n kruise-rollout --no-headers | grep manager | awk '{print $1}' | xargs kubectl logs -n kruise-rollout + fi + exit $retVal diff --git a/.github/workflows/e2e-deployment-minready-1.26.yaml b/.github/workflows/e2e-deployment-minready-1.26.yaml new file mode 100644 index 00000000..cb3e8e6d --- /dev/null +++ b/.github/workflows/e2e-deployment-minready-1.26.yaml @@ -0,0 +1,113 @@ +name: E2E-Deployment-MinReady-1.26 + +on: + push: + branches: + - master + - release-* + pull_request: {} + workflow_dispatch: {} + +# Declare default permissions as read only. +permissions: read-all + +env: + # Common versions + GO_VERSION: '1.20' + KIND_VERSION: 'v0.18.0' + KIND_IMAGE: 'kindest/node:v1.26.3' + KIND_CLUSTER_NAME: 'ci-testing' + +jobs: + + rollout: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + submodules: true + - name: Setup Go + uses: actions/setup-go@0aaccfd150d50ccaeb58ebd88d36e91967a5f35b # v5.4.0 + with: + go-version: ${{ env.GO_VERSION }} + - name: Setup Kind Cluster + uses: helm/kind-action@a1b0e391336a6ee6713a0583f8c6240d70863de3 # v1.12.0 + with: + node_image: ${{ env.KIND_IMAGE }} + cluster_name: ${{ env.KIND_CLUSTER_NAME }} + config: ./test/kind-conf.yaml + version: ${{ env.KIND_VERSION }} + - name: Build image + run: | + export IMAGE="openkruise/kruise-rollout:e2e-${GITHUB_RUN_ID}" + docker build --pull --no-cache . -t $IMAGE + kind load docker-image --name=${KIND_CLUSTER_NAME} $IMAGE || { echo >&2 "kind not installed or error loading image: $IMAGE"; exit 1; } + - name: Install Kruise + run: | + set -ex + kubectl cluster-info + make helm + helm repo add openkruise https://openkruise.github.io/charts/ + helm repo update + helm install kruise openkruise/kruise --version 1.7.0 + for ((i=1;i<10;i++)); + do + set +e + PODS=$(kubectl get pod -n kruise-system | grep '1/1' | grep kruise-controller-manager | wc -l) + set -e + if [ "$PODS" -eq "2" ]; then + break + fi + sleep 3 + done + set +e + PODS=$(kubectl get pod -n kruise-system | grep '1/1' | grep kruise-controller-manager | wc -l) + set -e + if [ "$PODS" -eq "2" ]; then + echo "Wait for kruise-manager ready successfully" + else + echo "Timeout to wait for kruise-manager ready" + exit 1 + fi + - name: Install Kruise Rollout + run: | + set -ex + kubectl cluster-info + IMG=openkruise/kruise-rollout:e2e-${GITHUB_RUN_ID} ./scripts/deploy_kind.sh + # Enable MinReadySecondsStrategy feature gate. + kubectl patch deployment kruise-rollout-controller-manager -n kruise-rollout --type='json' \ + -p='[{"op": "replace", "path": "/spec/template/spec/containers/0/args/1", "value": "--feature-gates=AdvancedDeployment=true,MinReadySecondsStrategy=true"}]' + kubectl rollout status deployment/kruise-rollout-controller-manager -n kruise-rollout --timeout=120s + for ((i=1;i<10;i++)); + do + set +e + PODS=$(kubectl get pod -n kruise-rollout | grep '1/1' | wc -l) + set -e + if [ "$PODS" -eq "1" ]; then + break + fi + sleep 3 + done + set +e + PODS=$(kubectl get pod -n kruise-rollout | grep '1/1' | wc -l) + kubectl get node -o yaml + kubectl get all -n kruise-rollout -o yaml + set -e + if [ "$PODS" -eq "1" ]; then + echo "Wait for kruise-rollout ready successfully" + else + echo "Timeout to wait for kruise-rollout ready" + exit 1 + fi + - name: Run E2E Tests For Deployment MinReadySeconds + run: | + export KUBECONFIG=/home/runner/.kube/config + make ginkgo + set +e + ./bin/ginkgo -timeout 60m -v test/e2e/minready + retVal=$? + if [ "${retVal}" -ne 0 ];then + echo "test fail, dump kruise-rollout logs" + kubectl get pod -n kruise-rollout --no-headers | grep manager | awk '{print $1}' | xargs kubectl logs -n kruise-rollout + fi + exit $retVal diff --git a/.github/workflows/e2e-deployment-minready-1.28.yaml b/.github/workflows/e2e-deployment-minready-1.28.yaml new file mode 100644 index 00000000..33ffca81 --- /dev/null +++ b/.github/workflows/e2e-deployment-minready-1.28.yaml @@ -0,0 +1,113 @@ +name: E2E-Deployment-MinReady-1.28 + +on: + push: + branches: + - master + - release-* + pull_request: {} + workflow_dispatch: {} + +# Declare default permissions as read only. +permissions: read-all + +env: + # Common versions + GO_VERSION: '1.20' + KIND_VERSION: 'v0.22.0' + KIND_IMAGE: 'kindest/node:v1.28.7' + KIND_CLUSTER_NAME: 'ci-testing' + +jobs: + + rollout: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + submodules: true + - name: Setup Go + uses: actions/setup-go@0aaccfd150d50ccaeb58ebd88d36e91967a5f35b # v5.4.0 + with: + go-version: ${{ env.GO_VERSION }} + - name: Setup Kind Cluster + uses: helm/kind-action@a1b0e391336a6ee6713a0583f8c6240d70863de3 # v1.12.0 + with: + node_image: ${{ env.KIND_IMAGE }} + cluster_name: ${{ env.KIND_CLUSTER_NAME }} + config: ./test/kind-conf.yaml + version: ${{ env.KIND_VERSION }} + - name: Build image + run: | + export IMAGE="openkruise/kruise-rollout:e2e-${GITHUB_RUN_ID}" + docker build --pull --no-cache . -t $IMAGE + kind load docker-image --name=${KIND_CLUSTER_NAME} $IMAGE || { echo >&2 "kind not installed or error loading image: $IMAGE"; exit 1; } + - name: Install Kruise + run: | + set -ex + kubectl cluster-info + make helm + helm repo add openkruise https://openkruise.github.io/charts/ + helm repo update + helm install kruise openkruise/kruise --version 1.7.0 + for ((i=1;i<10;i++)); + do + set +e + PODS=$(kubectl get pod -n kruise-system | grep '1/1' | grep kruise-controller-manager | wc -l) + set -e + if [ "$PODS" -eq "2" ]; then + break + fi + sleep 3 + done + set +e + PODS=$(kubectl get pod -n kruise-system | grep '1/1' | grep kruise-controller-manager | wc -l) + set -e + if [ "$PODS" -eq "2" ]; then + echo "Wait for kruise-manager ready successfully" + else + echo "Timeout to wait for kruise-manager ready" + exit 1 + fi + - name: Install Kruise Rollout + run: | + set -ex + kubectl cluster-info + IMG=openkruise/kruise-rollout:e2e-${GITHUB_RUN_ID} ./scripts/deploy_kind.sh + # Enable MinReadySecondsStrategy feature gate. + kubectl patch deployment kruise-rollout-controller-manager -n kruise-rollout --type='json' \ + -p='[{"op": "replace", "path": "/spec/template/spec/containers/0/args/1", "value": "--feature-gates=AdvancedDeployment=true,MinReadySecondsStrategy=true"}]' + kubectl rollout status deployment/kruise-rollout-controller-manager -n kruise-rollout --timeout=120s + for ((i=1;i<10;i++)); + do + set +e + PODS=$(kubectl get pod -n kruise-rollout | grep '1/1' | wc -l) + set -e + if [ "$PODS" -eq "1" ]; then + break + fi + sleep 3 + done + set +e + PODS=$(kubectl get pod -n kruise-rollout | grep '1/1' | wc -l) + kubectl get node -o yaml + kubectl get all -n kruise-rollout -o yaml + set -e + if [ "$PODS" -eq "1" ]; then + echo "Wait for kruise-rollout ready successfully" + else + echo "Timeout to wait for kruise-rollout ready" + exit 1 + fi + - name: Run E2E Tests For Deployment MinReadySeconds + run: | + export KUBECONFIG=/home/runner/.kube/config + make ginkgo + set +e + ./bin/ginkgo -timeout 60m -v test/e2e/minready + retVal=$? + if [ "${retVal}" -ne 0 ];then + echo "test fail, dump kruise-rollout logs" + kubectl get pod -n kruise-rollout --no-headers | grep manager | awk '{print $1}' | xargs kubectl logs -n kruise-rollout + fi + exit $retVal diff --git a/CHANGELOG.md b/CHANGELOG.md index 684878e7..80ab88f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Change Log +## Unreleased +### Key Features: +- Added the alpha Deployment `MinReadySeconds` rollout strategy, including API fields, feature gate, controller routing, observability, documentation, and tests. + ## v0.6.2 ### Bugfix: - Fixed issue where partition deployments got stuck. ([#307](https://github.com/openkruise/rollouts/pull/307),[@AiRanthem](https://github.com/AiRanthem)) diff --git a/Dockerfile_multiarch b/Dockerfile_multiarch index f60efafb..dc58638b 100644 --- a/Dockerfile_multiarch +++ b/Dockerfile_multiarch @@ -1,7 +1,7 @@ # Build the manager binary ARG BASE_IMAGE=alpine -ARG BASE_IMAGE_VERION=3.17 -FROM --platform=$BUILDPLATFORM golang:1.19-alpine3.17 AS builder +ARG BASE_IMAGE_VERION=3.19 +FROM --platform=$BUILDPLATFORM golang:1.20.14-alpine3.19 AS builder WORKDIR /workspace diff --git a/api/v1alpha1/deployment_types.go b/api/v1alpha1/deployment_types.go index 98fc474c..d7a39786 100644 --- a/api/v1alpha1/deployment_types.go +++ b/api/v1alpha1/deployment_types.go @@ -90,7 +90,7 @@ func SetDefaultDeploymentStrategy(strategy *DeploymentStrategy) { if strategy.RollingUpdate.MaxSurge == nil { // Set MaxSurge as 25% by default maxSurge := intstr.FromString("25%") - strategy.RollingUpdate.MaxUnavailable = &maxSurge + strategy.RollingUpdate.MaxSurge = &maxSurge } // Cannot allow maxSurge==0 && MaxUnavailable==0, otherwise, no pod can be updated when rolling update. diff --git a/api/v1alpha1/deployment_types_test.go b/api/v1alpha1/deployment_types_test.go new file mode 100644 index 00000000..8476ede7 --- /dev/null +++ b/api/v1alpha1/deployment_types_test.go @@ -0,0 +1,35 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import "testing" + +func TestSetDefaultDeploymentStrategyDefaultsMaxSurge(t *testing.T) { + strategy := &DeploymentStrategy{RollingStyle: PartitionRollingStyle} + + SetDefaultDeploymentStrategy(strategy) + + if strategy.RollingUpdate == nil { + t.Fatalf("rollingUpdate = nil, want defaulted") + } + if strategy.RollingUpdate.MaxSurge == nil || strategy.RollingUpdate.MaxSurge.StrVal != "25%" { + t.Fatalf("maxSurge = %v, want 25%%", strategy.RollingUpdate.MaxSurge) + } + if strategy.RollingUpdate.MaxUnavailable == nil || strategy.RollingUpdate.MaxUnavailable.StrVal != "25%" { + t.Fatalf("maxUnavailable = %v, want 25%%", strategy.RollingUpdate.MaxUnavailable) + } +} diff --git a/api/v1alpha1/rollout_types.go b/api/v1alpha1/rollout_types.go index 526c110e..f30ba7d4 100644 --- a/api/v1alpha1/rollout_types.go +++ b/api/v1alpha1/rollout_types.go @@ -219,6 +219,15 @@ const ( // Terminating Reason TerminatingReasonInTerminating = "InTerminating" TerminatingReasonCompleted = "Completed" + + // MinReadyInitialized indicates MinReadySeconds strategy initialization has completed. + RolloutConditionMinReadyInitialized RolloutConditionType = "MinReadyInitialized" + // RolloutConditionMinReadyBatching indicates MinReadySeconds strategy batch processing is active. + RolloutConditionMinReadyBatching RolloutConditionType = "MinReadyBatching" + // RolloutConditionMinReadyDegraded indicates MinReadySeconds strategy hit an explicit blocking error. + RolloutConditionMinReadyDegraded RolloutConditionType = "MinReadyDegraded" + // RolloutConditionMinReadyFinalized indicates MinReadySeconds strategy finalization has completed. + RolloutConditionMinReadyFinalized RolloutConditionType = "MinReadyFinalized" ) // CanaryStatus status fields that only pertain to the canary rollout diff --git a/api/v1beta1/deployment_types.go b/api/v1beta1/deployment_types.go index 5002fd82..338beeeb 100644 --- a/api/v1beta1/deployment_types.go +++ b/api/v1beta1/deployment_types.go @@ -47,8 +47,42 @@ const ( // MaxInt32: 2147483647, ≈ 68 years MaxProgressSeconds = 1<<31 - 1 MaxReadySeconds = MaxProgressSeconds - 1 + + // MinReady default values mirror Kubernetes Deployment defaults for fields + // snapshotted before the MinReadySeconds strategy inflates them. + MinReadyDefaultProgressDeadlineSeconds int32 = 600 + MinReadyDefaultMaxUnavailable = "25%" + + // MinReadyOriginal*Annotation snapshot the user-specified Deployment strategy + // fields before the MinReadySeconds strategy inflates them; they are used to + // restore the Deployment on finalize. A Deployment carrying any of them is + // (still) managed by the MinReady controller, even if the feature gate has + // been turned off mid-rollout. + MinReadyOriginalMinReadySecondsAnnotation = "rollouts.kruise.io/original-min-ready-seconds" + MinReadyOriginalProgressDeadlineSecondsAnnotation = "rollouts.kruise.io/original-progress-deadline-seconds" + MinReadyOriginalMaxUnavailableAnnotation = "rollouts.kruise.io/original-max-unavailable" ) +// MinReadyOriginalAnnotations lists all annotations that snapshot the original +// Deployment strategy fields for the MinReadySeconds strategy. +var MinReadyOriginalAnnotations = []string{ + MinReadyOriginalMinReadySecondsAnnotation, + MinReadyOriginalProgressDeadlineSecondsAnnotation, + MinReadyOriginalMaxUnavailableAnnotation, +} + +// HasMinReadyOriginalAnnotations returns true if the annotations carry any +// MinReady original-strategy snapshot, i.e. the workload was initialized by +// the MinReady controller and has not been finalized yet. +func HasMinReadyOriginalAnnotations(annotations map[string]string) bool { + for _, key := range MinReadyOriginalAnnotations { + if _, ok := annotations[key]; ok { + return true + } + } + return false +} + // DeploymentStrategy is strategy field for Advanced Deployment type DeploymentStrategy struct { // RollingStyle define the behavior of rolling for deployment. @@ -100,7 +134,7 @@ func SetDefaultDeploymentStrategy(strategy *DeploymentStrategy) { if strategy.RollingUpdate.MaxSurge == nil { // Set MaxSurge as 25% by default maxSurge := intstr.FromString("25%") - strategy.RollingUpdate.MaxUnavailable = &maxSurge + strategy.RollingUpdate.MaxSurge = &maxSurge } // Cannot allow maxSurge==0 && MaxUnavailable==0, otherwise, no pod can be updated when rolling update. diff --git a/api/v1beta1/deployment_types_test.go b/api/v1beta1/deployment_types_test.go new file mode 100644 index 00000000..ed79a6d5 --- /dev/null +++ b/api/v1beta1/deployment_types_test.go @@ -0,0 +1,35 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import "testing" + +func TestSetDefaultDeploymentStrategyDefaultsMaxSurge(t *testing.T) { + strategy := &DeploymentStrategy{RollingStyle: PartitionRollingStyle} + + SetDefaultDeploymentStrategy(strategy) + + if strategy.RollingUpdate == nil { + t.Fatalf("rollingUpdate = nil, want defaulted") + } + if strategy.RollingUpdate.MaxSurge == nil || strategy.RollingUpdate.MaxSurge.StrVal != "25%" { + t.Fatalf("maxSurge = %v, want 25%%", strategy.RollingUpdate.MaxSurge) + } + if strategy.RollingUpdate.MaxUnavailable == nil || strategy.RollingUpdate.MaxUnavailable.StrVal != "25%" { + t.Fatalf("maxUnavailable = %v, want 25%%", strategy.RollingUpdate.MaxUnavailable) + } +} diff --git a/api/v1beta1/rollout_types.go b/api/v1beta1/rollout_types.go index 06b108e6..50ba2a40 100644 --- a/api/v1beta1/rollout_types.go +++ b/api/v1beta1/rollout_types.go @@ -384,6 +384,15 @@ const ( TerminatingReasonInTerminating = "InTerminating" TerminatingReasonCompleted = "Completed" + // MinReadyInitialized indicates MinReadySeconds strategy initialization has completed. + RolloutConditionMinReadyInitialized RolloutConditionType = "MinReadyInitialized" + // RolloutConditionMinReadyBatching indicates MinReadySeconds strategy batch processing is active. + RolloutConditionMinReadyBatching RolloutConditionType = "MinReadyBatching" + // RolloutConditionMinReadyDegraded indicates MinReadySeconds strategy hit an explicit blocking error. + RolloutConditionMinReadyDegraded RolloutConditionType = "MinReadyDegraded" + // RolloutConditionMinReadyFinalized indicates MinReadySeconds strategy finalization has completed. + RolloutConditionMinReadyFinalized RolloutConditionType = "MinReadyFinalized" + // Finalise Reason // Finalise when the last batch is released and all pods will update to new version FinaliseReasonSuccess = "Success" diff --git a/docs/proposals/20260315-deployment-minready-seconds-progressive-delivery.md b/docs/proposals/20260315-deployment-minready-seconds-progressive-delivery.md index 1e37afa2..aabf4873 100644 --- a/docs/proposals/20260315-deployment-minready-seconds-progressive-delivery.md +++ b/docs/proposals/20260315-deployment-minready-seconds-progressive-delivery.md @@ -7,7 +7,7 @@ reviewers: - "@AiRanthem" - "@zmberg" creation-date: 2026-05-23 -last-updated: 2026-06-02 +last-updated: 2026-06-07 status: implementable --- @@ -31,7 +31,7 @@ status: implementable - [API Compatibility](#api-compatibility) - [Annotation Schema](#annotation-schema) - [Field Inflation Values](#field-inflation-values) - - [Optional maxSurge Module](#optional-maxsurge-module) + - [maxSurge Handling](#maxsurge-handling) - [Controller Implementation](#controller-implementation) - [Initialization Process](#initialization-process) - [Batch Upgrade Process](#batch-upgrade-process) @@ -133,7 +133,7 @@ graph TB WH["Workload Update Webhook
feature-gated Recreate skip
preserves RollingUpdate"] EX["BatchRelease Executor
feature-gated controller selection"] MRC["MinReadyControl
(embeds *realController)
Initialize / UpgradeBatch /
CalculateBatchContext / Finalize"] - MS["Optional maxSurge Module
preserve, limit, or disable
behind internal switch"] + MS["maxSurge Handling
left to native Deployment"] end subgraph "Kubernetes Native Control Plane" @@ -154,7 +154,7 @@ graph TB EX -->|"6. Route to MinReadyControl"| MRC MRC -->|"7. Initialize:
save original fields
inflate minReadySeconds
set maxUnavailable=0"| API - MRC -.->|"optional surge behavior"| MS + MRC -.->|"does not store or mutate maxSurge"| MS API -->|"persists fields + annotations"| DEP MRC -->|"8. UpgradeBatch:
increase maxUnavailable
by batch size"| API @@ -215,30 +215,29 @@ No `CanaryStrategy.DeploymentStrategy`, `ReleasePlan.DeploymentStrategy`, conver #### Annotation Schema -During rollout, the original values of four Deployment fields are persisted in annotations on the Deployment object itself. This makes the rollout state recoverable across controller restarts without any in-memory state. +During rollout, the original values of three Deployment fields are persisted in annotations on the Deployment object itself. This makes the rollout state recoverable across controller restarts without any in-memory state. ``` rollouts.kruise.io/original-min-ready-seconds: "" rollouts.kruise.io/original-progress-deadline-seconds: "" rollouts.kruise.io/original-max-unavailable: "" -rollouts.kruise.io/original-max-surge: "" ``` **Invariants**: -- All four annotations are written and deleted in a single `Patch` operation (relying on the Kubernetes API server's resource-level PATCH atomicity). -- All four present = rollout in progress; all four absent = idle state. -- If the user's original field is `nil` (relying on Kubernetes defaults), the sentinel value `__k8s_default__` is written. This preserves the distinction between "user explicitly set this value" and "user relied on the default", which is important during Finalize. +- All three annotations are written and deleted in a single `Patch` operation (relying on the Kubernetes API server's resource-level PATCH atomicity). +- All three present = rollout in progress; all three absent = idle state. +- If the user's original pointer field is `nil` in tests or fake-client paths, the annotation stores the Kubernetes API default value itself (`600` for `progressDeadlineSeconds`, `25%` for `maxUnavailable`) instead of a sentinel string. **Serialization rules**: | Source type | Example value | Annotation string | |---|---|---| | `int32` (pointer non-nil) | `int32(10)` | `"10"` | -| `int32` (pointer nil) | — | `"__k8s_default__"` | +| `int32` (pointer nil) | — | `"600"` | | `IntOrString` Type=Int | `{Type: Int, IntVal: 5}` | `"5"` | | `IntOrString` Type=String | `{Type: String, StrVal: "25%"}` | `"25%"` | -| `*IntOrString` pointer nil | — | `"__k8s_default__"` | +| `*IntOrString` pointer nil | — | `"25%"` | #### Field Inflation Values @@ -252,31 +251,17 @@ During `Initialize`, the core MinReadySeconds path inflates three Deployment fie **Why `minReadySeconds` is one less than `progressDeadlineSeconds`**: Kubernetes Deployment validation requires `minReadySeconds < progressDeadlineSeconds`. Setting both to `MaxInt32` would cause the Deployment to fail validation. The existing constant `MaxReadySeconds = MaxProgressSeconds - 1` (defined in `api/v1beta1/deployment_types.go`) is reused. -`maxSurge` is deliberately not part of the core field-inflation contract. It is handled by a separate policy module so maintainers can enable full surge support, use a conservative alpha policy, or temporarily disable the module without changing the MinReadySeconds rollout algorithm. +`maxSurge` is not a new user-facing policy or a MinReadySeconds rollout knob. It is an existing Kubernetes RollingUpdate field, and the MinReadySeconds path preserves the user's original value by default. -#### Optional maxSurge Module +#### maxSurge Handling -Native Deployment RollingUpdate supports surge capacity, and this proposal should not require `maxSurge=1` as a semantic constraint. The implementation isolates surge handling behind an internal policy boundary: +Native Deployment RollingUpdate supports surge capacity, and this proposal does not treat `maxSurge` as a MinReadySeconds rollout knob. The implementation follows one internal rule: -```go -type surgePolicy interface { - Initialize(deployment *appsv1.Deployment, original intstr.IntOrString) error - Ensure(deployment *appsv1.Deployment, original intstr.IntOrString) error - Restore(deployment *appsv1.Deployment, original intstr.IntOrString) error -} -``` - -Supported policy choices: - -| Policy | Alpha status | Behavior | -|---|---|---| -| `PreserveSurgePolicy` | Preferred if accepted | Preserve the user's original `maxSurge`. Surge-created updated pods are allowed, but they are counted as batch-complete only after satisfying the original `minReadySeconds`. | -| `ConservativeSurgePolicy` | Fallback | Save and restore the user's original `maxSurge`, but use a small live value during rollout to reduce temporary capacity pressure. This is an implementation fallback, not a user-visible API guarantee. | -| `DisabledSurgePolicy` | Escape hatch | Reject or degrade workloads whose `maxSurge` requires unsupported behavior. This keeps the maxSurge module removable from alpha without touching the core MinReadySeconds controller. | +1. Do not store the original `maxSurge` value in a MinReady annotation. +2. Do not mutate `maxSurge` during `Initialize`, `UpgradeBatch`, or `Finalize`. +3. Let the native Deployment strategy retain whatever `maxSurge` value the user or Kubernetes defaulting already provided. -The batch-ready calculation is the same under all policies: count updated pods only after they are `Ready` and have remained ready for the user's original `minReadySeconds`. Therefore, preserving a larger `maxSurge` can increase temporary pod count, but it cannot mark a batch successful early. - -Any policy must also preserve Kubernetes RollingUpdate validation rules. In particular, the live strategy must not set both `maxUnavailable=0` and `maxSurge=0`. If the maxSurge module is disabled for alpha, the controller should reject unsupported surge configurations or keep a minimal valid live surge value rather than writing an invalid Deployment strategy. +The batch-ready calculation is independent from `maxSurge`: count updated pods only after they are `Ready` and have remained ready for the user's original `minReadySeconds`. Therefore, preserving a larger `maxSurge` can increase temporary pod count, but it cannot mark a batch successful early. #### Controller Implementation @@ -288,7 +273,7 @@ type MinReadyControl struct { } func NewMinReadyController(cli client.Client, key types.NamespacedName, gvk schema.GroupVersionKind) partitionstyle.Interface { - return &MinReadyControl{realController: NewController(cli, key, gvk).(*realController)} + return &MinReadyControl{realController: newRealController(cli, key)} } ``` @@ -296,7 +281,7 @@ func NewMinReadyController(cli client.Client, key types.NamespacedName, gvk sche |---|---| | `GetWorkloadInfo` | Inherited (no change). | | `ListOwnedPods` | Inherited (no change). | -| `BuildController` | Inherited (no change). | +| `BuildController` | **Wrapped** — builds the embedded real controller and returns a `MinReadyControl`. | | `Initialize` | **Overridden** — see [Initialization Process](#initialization-process). | | `UpgradeBatch` | **Overridden** — see [Batch Upgrade Process](#batch-upgrade-process). | | `CalculateBatchContext` | **Overridden** — see [Batch Context Calculation](#batch-context-calculation). | @@ -309,13 +294,14 @@ func NewMinReadyController(cli client.Client, key types.NamespacedName, gvk sche 1. **Eligibility check** (`ensureInitializeAllowed`): - The `MinReadySecondsStrategy` feature gate must be enabled. Otherwise return error → `MinReadyDegraded`. - The Deployment must use `RollingUpdate`. `Recreate` workloads continue to use the existing path. - - PDB presence is not a hard rejection. PDBs are detected for observability only because they protect Eviction API flows, not Deployment rolling updates. + - PDB presence is not an eligibility failure. PDBs protect Eviction API flows, not Deployment rolling updates, so they are not used as the batch-safety mechanism. 2. **Annotation persistence** (`writeOriginalAnnotations`): - - If any of the four annotations is already present, validate that all four exist (idempotency check) and that the on-disk fields are already inflated. If consistent, no-op. - - Otherwise, serialize the current values of `minReadySeconds`, `progressDeadlineSeconds`, `maxUnavailable`, `maxSurge` per the serialization rules above and write all four annotations. + - If any of the three annotations is already present, validate that all three exist (idempotency check). If the on-disk fields are already inflated, no-op. + - If a continuous release supplies new user-owned `minReadySeconds` / `progressDeadlineSeconds` while annotations already exist, refresh those original annotations before re-inflating. + - Otherwise, serialize the current values of `minReadySeconds`, `progressDeadlineSeconds`, and `maxUnavailable` per the serialization rules above and write all three annotations. -3. **Field inflation** (`inflateDeploymentStrategy`): Set `minReadySeconds`, `progressDeadlineSeconds`, and `maxUnavailable` to their MinReadySeconds values. Apply the configured `maxSurge` policy module if enabled. +3. **Field inflation** (`inflateDeploymentStrategy`): Set `minReadySeconds`, `progressDeadlineSeconds`, and `maxUnavailable` to their MinReadySeconds values. Leave `maxSurge` unchanged. 4. **Atomic commit**: Issue a single `Patch` using `client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{})`. The annotations and field changes are committed together; the Kubernetes API server's resource-level PATCH atomicity guarantees no partial state is observable. @@ -323,10 +309,10 @@ func NewMinReadyController(cli client.Client, key types.NamespacedName, gvk sche `UpgradeBatch(ctx)` is invoked per batch by the BatchRelease executor. It performs: -1. **Inflation invariant** (`ensureInflatedDeploymentStrategy`): Verify and, if necessary, patch the Deployment so `minReadySeconds == MaxReadySeconds` and `progressDeadlineSeconds == MaxProgressSeconds` before each batch operation. The active `maxSurge` policy performs its own `Ensure` step. This makes the inflated fields a rollout-long invariant rather than a one-time initialization side effect. +1. **Inflation invariant** (`ensureInflatedDeploymentStrategy`): Verify and, if necessary, patch the Deployment so `minReadySeconds == MaxReadySeconds` and `progressDeadlineSeconds == MaxProgressSeconds` before each batch operation. `maxSurge` is not part of the MinReady invariant. This makes the inflated fields a rollout-long invariant rather than a one-time initialization side effect. 2. **Target computation**: Read the current `maxUnavailable` and compare against `ctx.DesiredUpdatedReplicas`. - - If `current > target`: external write has increased `maxUnavailable` beyond the batch target → `MinReadyDegraded`. + - If `current > target`: external write or scale-down has left `maxUnavailable` above the batch target. This is legal and self-heals by reducing it to the target. - If `current >= target`: already at target, no-op. 3. **Patch `maxUnavailable = target`**: A single-field Patch using the same optimistic-lock mechanism. The native RollingUpdate controller observes the change and creates new pods accordingly. Because `minReadySeconds` is inflated, the new pods enter `Ready-but-not-Available` from the Deployment controller's perspective. @@ -362,23 +348,23 @@ The controller must use the original `minReadySeconds` saved in the Deployment a `Finalize` restores the Deployment to its pre-rollout state. It performs: 1. If the Deployment object is `nil` (deleted), no-op. -2. If none of the four annotations is present, the Deployment is already in idle state — no-op. +2. If none of the three annotations is present, the Deployment is already in idle state — no-op. 3. **Parse annotations** (`parseOriginalDeploymentStrategy`): | Annotation state | Parse result | Behavior | |---|---|---| -| All four present and parseable | Restored field values (with `nil` indicating "user relied on default") | Normal Finalize. | +| All three present and parseable | Restored field values | Normal Finalize. | | Any one fails to parse (corrupt format) | Error | `MinReadyDegraded`. | | Partial annotations missing | Error | `MinReadyDegraded`. | -| All four missing | — | No-op (already idle). | +| All three missing | — | No-op (already idle). | 4. **Field restoration** (`applyOriginalDeploymentStrategy`): - - `minReadySeconds`: `nil` → set to `0` (Kubernetes default); non-nil → restore original. - - `progressDeadlineSeconds`: `nil` → clear pointer (Kubernetes default `600`s applies); non-nil → restore original. - - If both `maxUnavailable` and `maxSurge` are `nil`, clear the entire `RollingUpdate` block (Kubernetes default applies). - - Otherwise, restore each field individually. + - `minReadySeconds`: restore the parsed integer value. + - `progressDeadlineSeconds`: restore the parsed integer value. + - `maxUnavailable`: restore the parsed int-or-percent value. + - `maxSurge`: not restored from MinReady annotations and not mutated by MinReady finalization. -5. Delete all four annotations and `Patch` atomically. +5. Delete all three annotations and `Patch` atomically. **Why Degraded refuses to silently fall back to Kubernetes defaults**: A user whose original `maxUnavailable` was `50%` and whose annotations were corrupted should not be silently downgraded to the Kubernetes default `25%`. The release-rate change is operationally significant and should be surfaced for human review, not masked. @@ -404,7 +390,7 @@ The controller maintains **no in-memory state**. After a controller restart or l | Before Initialize | No annotations | Re-run `Initialize` (idempotent). | | After Initialize | Four annotations present + fields inflated | Skip `Initialize`, proceed to `UpgradeBatch`. | | Mid-UpgradeBatch, `maxUnavailable` already at target | Annotations present, `maxUnavailable >= target` | Proceed to batch-ready check. | -| Mid-UpgradeBatch, `maxUnavailable` not yet at target | Annotations present, `maxUnavailable < target` | Re-issue the `UpgradeBatch` Patch. | +| Mid-UpgradeBatch, `maxUnavailable` not yet at target | Annotations present, `maxUnavailable < target` | Continue the MinReady maxUnavailable window reconcile on the next `UpgradeBatch` / `EnsureBatchPodsReadyAndLabeled` pass. | | Mid-Finalize, fields restored but annotations remain | Annotations present, fields not inflated | Re-issue the `Finalize` (deletes annotations). | | After Finalize | No annotations | No-op. | @@ -415,40 +401,79 @@ The state determination is always based on **observable Deployment state**, neve A feature-gated guard is added to `pkg/webhook/workload/mutating/workload_update_handler.go`: ```go -func shouldSkipRecreateMutationForMinReady(rollout *appsv1beta1.Rollout) bool { - return rollout.Spec.Strategy.Canary != nil && - !rollout.Spec.Strategy.Canary.EnableExtraWorkloadForCanary && - utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) +func isMinReadySecondsStrategy(rollout *appsv1beta1.Rollout, deployment *apps.Deployment) bool { + if rollout.Spec.Strategy.Canary == nil || + rollout.Spec.Strategy.Canary.EnableExtraWorkloadForCanary { + return false + } + if utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) { + return true + } + strategy := util.GetDeploymentStrategy(deployment) + return strings.EqualFold(string(strategy.RollingStyle), string(appsv1alpha1.PartitionRollingStyle)) } ``` -The guard is invoked at the top of the existing Recreate mutation logic: +The guard splits the mutation into two paths. `isMinReadySecondsStrategy` only checks `Canary` because a Rollout cannot declare both `BlueGreen` and `Canary` — the validating webhook rejects that combination. When the feature gate is disabled mid-rollout, the Deployment's `DeploymentStrategyAnnotation` keeps the webhook symmetric with the executor's MinReady annotation fallback. + +**Enrollment path (workload entering progressing).** Instead of pausing the Deployment, the webhook synchronously snapshots the original strategy fields into annotations and inflates `minReadySeconds` / `progressDeadlineSeconds` / `maxUnavailable` in place via the mutating package's local `enrollMinReadyDeployment` helper. This keeps admission code independent from partition-style controller internals: ```go -if shouldSkipRecreateMutationForMinReady(rollout) { - return false, nil // do not mutate; preserve the user's original strategy +if isMinReadySecondsStrategy(rollout, deployment) { + // MinReady keeps the native controller running, so it must NOT be paused. + // Inflate synchronously at admission time so the native controller never + // observes the user's original budget in the window between admission and + // MinReadyControl.Initialize. Continuous releases refresh user-owned + // availability annotations before re-inflation. + if err := enrollMinReadyDeployment(newObj); err != nil { + klog.Warningf("Skip MinReady enrollment for Deployment(%s/%s): %v", ...) + } +} else { + newObj.Spec.Paused = true // Partition/Recreate style disables the native controller } -// ... existing Recreate mutation logic unchanged ... ``` -When the feature gate is disabled, the existing behavior is preserved exactly. +Enrolling at admission time closes the race window that would otherwise exist between "new revision admitted" and "`Initialize` patch lands", during which the native controller could replace pods using the user's original budget before batch 0 takes effect. Enrollment does not block admission: an unsupported strategy (e.g. `Recreate`) only logs a warning, and `MinReadyControl.Initialize` surfaces a degraded condition instead. + +**Progressing path (re-admission of an active rollout).** For a Deployment already in progressing state, `enforceMinReadyInflation` re-asserts the full set of invariants the strategy depends on, rewriting unsafe external edits back to safe values before they reach storage: -The webhook also enforces the inflation invariant for active MinReadySeconds rollouts. If an external writer lowers or clears `minReadySeconds` or `progressDeadlineSeconds` while the Deployment still carries the original-value annotations, the webhook rewrites the update back to `MaxReadySeconds` and `MaxProgressSeconds`. This complements the reconcile-time `ensureInflatedDeploymentStrategy` check and prevents a short window where the native Deployment controller could observe restored values before the rollout has finalized. +- `spec.strategy.type` forced back to `RollingUpdate` (a `Recreate` write is rejected); +- `spec.paused` forced back to `false` (a paused Deployment would silently freeze the native controller); +- `spec.strategy.rollingUpdate` ensured non-nil; +- `minReadySeconds` / `progressDeadlineSeconds` re-inflated if lowered or cleared. + +This complements the reconcile-time `ensureInflatedDeploymentStrategy` check: the webhook blocks dangerous spec at admission, while the controller self-heals any drift that slips through (e.g. a direct etcd write or a GitOps reconcile between admissions). #### Strategy Selection -The BatchRelease executor routes to the MinReadySeconds controller based on the feature gate and the existing rollout shape: +The BatchRelease executor routes to the MinReadySeconds controller when the +feature gate is enabled, **or** when the target Deployment still carries the +MinReady original-value annotations: ```go -if utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) && - isNativeDeployment(release) && - isPartitionStyleCanary(release) { +if utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) || + r.deploymentHasMinReadyAnnotations(targetKey) { return partitionstyle.NewControlPlane(partitiondeployment.NewMinReadyController, ...) } return partitionstyle.NewControlPlane(partitiondeployment.NewController, ...) ``` -No strategy value is copied through `ReleasePlan`; disabled feature gates and unsupported workload shapes fall through to the existing `NewController`, preserving the current Recreate-mode behavior. +The annotation clause is the important one for **gate lifecycle safety**. A +Deployment that was already enrolled (RollingUpdate strategy, `paused=false`, +inflated fields, four original annotations) is **not** recognized as under +control by the legacy Recreate-mode controller, whose ownership check requires +`strategy.type=Recreate && paused=true`. If the gate were turned off mid-rollout +and routing fell back to the legacy controller, the workload would be stranded: +`UpgradeBatch` skipped, `Finalize` a no-op, inflated fields and annotations left +behind. Keeping MinReady control whenever the annotations are present lets an +in-flight rollout finalize cleanly and restore the user's original strategy even +after the gate is disabled. The `isMinReadyRelease` status helper is widened the +same way, so degraded conditions are not silently suppressed once the gate flips +off. + +No strategy value is copied through `ReleasePlan`; a disabled gate with no +MinReady annotations, and unsupported workload shapes, fall through to the +existing `NewController`, preserving the current Recreate-mode behavior. ### Risks and Mitigations @@ -460,7 +485,7 @@ Therefore, a PDB may coexist with this rollout, but it cannot enforce the MinRea **Mitigation**: - Do not reject a Deployment rollout only because a PDB selector covers its pods. -- Emit an informational event when a matching PDB exists so operators understand that eviction budget and rollout batch readiness are separate controls. +- Document PDB coexistence clearly so operators treat eviction budget and rollout batch readiness as separate controls. - Calculate batch readiness inside `MinReadyControl` using updated revision plus original `minReadySeconds`; never rely on PDB status as the rollout readiness signal. --- @@ -494,11 +519,59 @@ The worst-case failure mode of this strategy. - **This is the intended worst case** and the core safety guarantee. Unlike Recreate mode, which can result in a service outage from en-masse pod recreation, this strategy degrades gracefully: pods continue serving traffic in their current state. - A `MinReadyDegraded` condition is set, allowing observability systems to alert on the condition. The user can manually run `kubectl patch` to restore fields once the underlying issue is resolved. +### Operator Runbook (alpha) + +This section is the operational contract for the alpha feature gate. It captures the failure modes an operator must understand before enabling `MinReadySecondsStrategy` in a cluster. + +#### Feature gate lifecycle + +The `MinReadySecondsStrategy` gate is **cluster-scoped** (it lives on the kruise-rollout controller, not on individual Rollout resources). Enabling it changes the control mode for **every** native-Deployment partition-style rollout in the cluster, not a selected subset. Per-rollout opt-in is deferred to beta. + +- **Enabling**: turn the gate on before starting a rollout. Newly progressing Deployments are enrolled (strategy inflated) at admission time. +- **Disabling — preconditions**: a Deployment that is mid-rollout under MinReady control carries the four `rollouts.kruise.io/original-*` annotations and has inflated `minReadySeconds`/`progressDeadlineSeconds`. The old Recreate-mode controller does **not** recognize such a Deployment as under its control (it keys on `strategy.type=Recreate && paused=true`). To avoid stranding a workload in a half-initialized inflated state, **finish or cancel all in-flight MinReady rollouts before disabling the gate.** +- **Disabling — safety net**: if the gate is turned off mid-rollout anyway, the executor still routes a Deployment that carries the MinReady original annotations to the MinReady controller (it does not look only at the gate). This lets the rollout finalize and restore the original fields. Once finalized (annotations removed), routing falls back to the default controller. Verify cleanup with `kubectl get deploy -o jsonpath='{.metadata.annotations}'` — no `rollouts.kruise.io/original-*` keys should remain. + +#### `progressDeadlineSeconds` inflation disables the native stuck-detector + +To stop the native Deployment controller from declaring `ProgressDeadlineExceeded` while a batch intentionally waits, `progressDeadlineSeconds` is inflated to `MaxProgressSeconds` (≈68 years). This is deliberate, but it means the **native progress-deadline safety net is off** for the duration of the rollout. The rollout's own stuck-time gauge (`MinReadyStuckSeconds`) replaces it. Operators must alert on that gauge / on the `MinReadyDegraded` condition rather than expecting the native controller to surface a stuck rollout. + +#### Worst case: controller dies while a workload is frozen + +If the kruise-rollout controller becomes **permanently unavailable** (crash-loop, deleted deployment, broken leader election) while a Deployment is parked mid-batch, the workload freezes silently: + +- New pods stay `Ready-but-not-Available` because `minReadySeconds` is inflated; the batch never advances. +- The native progress-deadline net is disabled (see above), so the native controller will not report the freeze either. +- The only symptom is the `MinReadyStuckSeconds` gauge climbing — which requires the controller (or an external watchdog) to be alive to emit it. + +This is an accepted alpha limitation. Manual recovery without the controller: + +1. Identify the Deployment and read its saved originals: + ```bash + kubectl get deploy -n \ + -o jsonpath='{.metadata.annotations.rollouts\.kruise\.io/original-min-ready-seconds}{"\n"}{.metadata.annotations.rollouts\.kruise\.io/original-max-unavailable}{"\n"}' + ``` + If a field was unset originally, the annotation contains the Kubernetes API default value (`600` or `25%`). +2. Restore the original strategy fields and clear the rollout control annotation: + ```bash + kubectl patch deploy -n --type merge -p '{ + "spec": {"minReadySeconds": , "progressDeadlineSeconds": , + "strategy": {"rollingUpdate": {"maxUnavailable": }}}, + "metadata": {"annotations": { + "rollouts.kruise.io/original-min-ready-seconds": null, + "rollouts.kruise.io/original-progress-deadline-seconds": null, + "rollouts.kruise.io/original-max-unavailable": null, + "rollouts.kruise.io/batch-release-control": null }}}' + ``` + The native Deployment controller then resumes a normal rolling update to completion. +3. Before re-enabling the gate, confirm no Deployment retains `rollouts.kruise.io/original-*` annotations. + +A production-grade watchdog that alerts on "controller liveness lost while a MinReady rollout is in flight" is tracked as beta Future Work. + ## Alternatives 1. **Continue mutating `spec.strategy.type` to `Recreate`** (current implementation): Rejected because the destructive nature of strategy mutation cannot be atomically reversed under failure. Multiple production incidents ([#305](https://github.com/openkruise/rollouts/issues/305)) demonstrate the risk. -2. **Custom `ReadinessGate` to gate pod availability**: A future direction (Plan B) that would allow PDB coexistence. Rejected for the alpha phase because it requires a custom mutating webhook to inject the gate, a separate controller to manage the gate condition, and significant additional testing surface. Tracked as Future Work for beta. +2. **Custom `ReadinessGate` to gate pod availability**: A future direction (Plan B) that would let PDB/disruption-controller visibility participate in rollout gating. Rejected for the alpha phase because it requires a custom mutating webhook to inject the gate, a separate controller to manage the gate condition, and significant additional testing surface. Tracked as Future Work for beta. 3. **Use `paused=true` plus partition annotations** (similar to CloneSet): Rejected because the native Deployment controller does not honor a partition mechanism. Implementing partition-style for native Deployment would require either re-implementing the rolling update loop or relying on `Recreate`, returning to the original problem. @@ -529,6 +602,6 @@ Users opt in by enabling the feature gate on the kruise-rollout controller. - [ ] Q2 2026 (GSoC weeks 1–6): MinReadyControl core implementation (Initialize / UpgradeBatch / CalculateBatchContext / Finalize) with unit tests - [ ] Q3 2026 (GSoC weeks 7–8): Webhook invariant enforcement and feature-gated strategy selection - [ ] Q3 2026 (GSoC weeks 9–10): End-to-end tests covering the five core scenarios -- [ ] Q3 2026 (GSoC weeks 11–12): PDB coexistence, maxSurge policy hardening, edge cases, documentation +- [ ] Q3 2026 (GSoC weeks 11–12): PDB coexistence and documentation - [ ] TBD: Observability follow-up (status conditions, events, Prometheus metrics) - [ ] TBD: Plan B (custom `ReadinessGate`) if future requirements need PDB-aware workload availability semantics diff --git a/pkg/controller/batchrelease/batchrelease_controller.go b/pkg/controller/batchrelease/batchrelease_controller.go index 3b93f459..f314cabf 100644 --- a/pkg/controller/batchrelease/batchrelease_controller.go +++ b/pkg/controller/batchrelease/batchrelease_controller.go @@ -160,7 +160,7 @@ type BatchReleaseReconciler struct { // and what is in the Rollout.Spec func (r *BatchReleaseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { release := new(v1beta1.BatchRelease) - err := r.Get(context.TODO(), req.NamespacedName, release) + err := r.Get(ctx, req.NamespacedName, release) if err != nil { if errors.IsNotFound(err) { // Object not found, return. Created objects are automatically garbage collected. @@ -201,7 +201,7 @@ func (r *BatchReleaseReconciler) Reconcile(ctx context.Context, req ctrl.Request // executor start to execute the batch release plan. startTimestamp := time.Now() - result, currentStatus, err := r.executor.Do(release) + result, currentStatus, err := r.executor.Do(ctx, release) if err != nil { errList = append(errList, field.InternalError(field.NewPath("do-release"), err)) } diff --git a/pkg/controller/batchrelease/batchrelease_controller_test.go b/pkg/controller/batchrelease/batchrelease_controller_test.go index a29428a9..4147a032 100644 --- a/pkg/controller/batchrelease/batchrelease_controller_test.go +++ b/pkg/controller/batchrelease/batchrelease_controller_test.go @@ -21,6 +21,7 @@ import ( "encoding/json" "fmt" "strconv" + "strings" "testing" "time" @@ -29,6 +30,7 @@ import ( kruiseappsv1alpha1 "github.com/openkruise/kruise-api/apps/v1alpha1" apps "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -44,7 +46,9 @@ import ( rolloutapi "github.com/openkruise/rollouts/api" "github.com/openkruise/rollouts/api/v1beta1" + "github.com/openkruise/rollouts/pkg/feature" "github.com/openkruise/rollouts/pkg/util" + utilfeature "github.com/openkruise/rollouts/pkg/util/feature" ) const TIME_LAYOUT = "2006-01-02 15:04:05" @@ -209,6 +213,7 @@ var ( func init() { scheme = runtime.NewScheme() apimachineryruntime.Must(apps.AddToScheme(scheme)) + apimachineryruntime.Must(policyv1.AddToScheme(scheme)) apimachineryruntime.Must(rolloutapi.AddToScheme(scheme)) apimachineryruntime.Must(kruiseappsv1alpha1.AddToScheme(scheme)) @@ -824,6 +829,168 @@ func TestReconcile_Deployment(t *testing.T) { } } +func TestExecutorFallsBackToRecreateWhenMinReadyFeatureGateDisabled(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=false") + release := releaseDeploy.DeepCopy() + release.Spec.ReleasePlan.RollingStyle = v1beta1.PartitionRollingStyle + release.Status.Phase = v1beta1.RolloutPhasePreparing + deployment := stableDeploy.DeepCopy() + rec := record.NewFakeRecorder(100) + cli := fake.NewClientBuilder().WithScheme(scheme). + WithObjects(release, deployment). + WithStatusSubresource(&v1beta1.BatchRelease{}). + Build() + + controller, err := NewReleasePlanExecutor(cli, rec).getReleaseController(context.Background(), release, release.Status.DeepCopy()) + if err != nil { + t.Fatalf("getReleaseController failed: %v", err) + } + if err := controller.Initialize(); err != nil { + t.Fatalf("Initialize failed: %v", err) + } + + got := &apps.Deployment{} + if err := cli.Get(context.TODO(), client.ObjectKeyFromObject(deployment), got); err != nil { + t.Fatalf("Get deployment failed: %v", err) + } + if got.Spec.Strategy.Type != apps.RecreateDeploymentStrategyType { + t.Fatalf("strategy.type = %q, want Recreate fallback when feature gate disabled", got.Spec.Strategy.Type) + } +} + +func TestMinReadyControlPlaneRecordsInitializedConditionAndEvent(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := minReadyRelease() + deployment := stableDeploy.DeepCopy() + deployment.ResourceVersion = "1" + rec := record.NewFakeRecorder(100) + cli := fake.NewClientBuilder().WithScheme(scheme). + WithObjects(release, deployment). + WithStatusSubresource(&v1beta1.BatchRelease{}). + Build() + status := release.Status.DeepCopy() + controller, err := NewReleasePlanExecutor(cli, rec).getReleaseController(context.Background(), release, status) + if err != nil { + t.Fatalf("getReleaseController failed: %v", err) + } + + if err := controller.Initialize(); err != nil { + t.Fatalf("Initialize failed: %v", err) + } + + assertCondition(t, status, v1beta1.RolloutConditionMinReadyInitialized, corev1.ConditionTrue, "MinReadyInitialized") + assertRecordedEvent(t, rec, "MinReadyInitialized") +} + +func TestMinReadyControlPlaneAllowsPDBCoexistence(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := minReadyRelease() + deployment := stableDeploy.DeepCopy() + deployment.ResourceVersion = "1" + deployment.Spec.Template.Labels = map[string]string{"app": "busybox"} + pdb := &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{Name: "sample-pdb", Namespace: deployment.Namespace}, + Spec: policyv1.PodDisruptionBudgetSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "busybox"}}, + }, + } + rec := record.NewFakeRecorder(100) + cli := fake.NewClientBuilder().WithScheme(scheme). + WithObjects(release, deployment, pdb). + WithStatusSubresource(&v1beta1.BatchRelease{}). + Build() + status := release.Status.DeepCopy() + controller, err := NewReleasePlanExecutor(cli, rec).getReleaseController(context.Background(), release, status) + if err != nil { + t.Fatalf("getReleaseController failed: %v", err) + } + + if err := controller.Initialize(); err != nil { + t.Fatalf("Initialize failed: %v", err) + } + + assertCondition(t, status, v1beta1.RolloutConditionMinReadyInitialized, corev1.ConditionTrue, "MinReadyInitialized") +} + +func BenchmarkRecreateReconcile(b *testing.B) { + release := releaseDeploy.DeepCopy() + deployment := stableDeploy.DeepCopy() + rec := record.NewFakeRecorder(100) + cli := fake.NewClientBuilder().WithScheme(scheme). + WithObjects(release, deployment). + WithStatusSubresource(&v1beta1.BatchRelease{}). + Build() + reconciler := &BatchReleaseReconciler{ + Client: cli, + recorder: rec, + Scheme: scheme, + executor: NewReleasePlanExecutor(cli, rec), + } + req := reconcile.Request{NamespacedName: client.ObjectKeyFromObject(release)} + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _ = reconciler.Reconcile(context.TODO(), req) + } +} + +func BenchmarkMinReadyReconcile(b *testing.B) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := minReadyRelease() + deployment := stableDeploy.DeepCopy() + deployment.ResourceVersion = "1" + rec := record.NewFakeRecorder(100) + cli := fake.NewClientBuilder().WithScheme(scheme). + WithObjects(release, deployment). + WithStatusSubresource(&v1beta1.BatchRelease{}). + Build() + reconciler := &BatchReleaseReconciler{ + Client: cli, + recorder: rec, + Scheme: scheme, + executor: NewReleasePlanExecutor(cli, rec), + } + req := reconcile.Request{NamespacedName: client.ObjectKeyFromObject(release)} + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _ = reconciler.Reconcile(context.TODO(), req) + } +} + +func minReadyRelease() *v1beta1.BatchRelease { + release := releaseDeploy.DeepCopy() + release.Spec.ReleasePlan.RollingStyle = v1beta1.PartitionRollingStyle + release.Status.Phase = v1beta1.RolloutPhasePreparing + return release +} + +func assertCondition(t *testing.T, status *v1beta1.BatchReleaseStatus, condType v1beta1.RolloutConditionType, condStatus corev1.ConditionStatus, reason string) { + t.Helper() + for _, condition := range status.Conditions { + if condition.Type != condType { + continue + } + if condition.Status != condStatus || condition.Reason != reason { + t.Fatalf("condition %s = %s/%s, want %s/%s", condType, condition.Status, condition.Reason, condStatus, reason) + } + return + } + t.Fatalf("condition %s not found in %#v", condType, status.Conditions) +} + +func assertRecordedEvent(t *testing.T, rec *record.FakeRecorder, want string) { + t.Helper() + select { + case event := <-rec.Events: + if !strings.Contains(event, want) { + t.Fatalf("event = %q, want containing %q", event, want) + } + case <-time.After(time.Second): + t.Fatalf("event containing %q not recorded", want) + } +} + func containers(version string) []corev1.Container { return []corev1.Container{ { diff --git a/pkg/controller/batchrelease/batchrelease_executor.go b/pkg/controller/batchrelease/batchrelease_executor.go index ec74e27b..aeb1bdd4 100644 --- a/pkg/controller/batchrelease/batchrelease_executor.go +++ b/pkg/controller/batchrelease/batchrelease_executor.go @@ -17,6 +17,7 @@ limitations under the License. package batchrelease import ( + "context" "fmt" "reflect" "time" @@ -45,8 +46,10 @@ import ( partitiondeployment "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment" "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/nativedaemonset" "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/statefulset" + "github.com/openkruise/rollouts/pkg/feature" "github.com/openkruise/rollouts/pkg/util" "github.com/openkruise/rollouts/pkg/util/errors" + utilfeature "github.com/openkruise/rollouts/pkg/util/feature" ) const ( @@ -68,7 +71,7 @@ func NewReleasePlanExecutor(cli client.Client, recorder record.EventRecorder) *E } // Do execute the release plan -func (r *Executor) Do(release *v1beta1.BatchRelease) (reconcile.Result, *v1beta1.BatchReleaseStatus, error) { +func (r *Executor) Do(ctx context.Context, release *v1beta1.BatchRelease) (reconcile.Result, *v1beta1.BatchReleaseStatus, error) { klog.InfoS("Starting one round of reconciling release plan", "BatchRelease", client.ObjectKeyFromObject(release), "phase", release.Status.Phase, @@ -76,7 +79,7 @@ func (r *Executor) Do(release *v1beta1.BatchRelease) (reconcile.Result, *v1beta1 "current-batch-state", release.Status.CanaryStatus.CurrentBatchState) newStatus := getInitializedStatus(&release.Status) - workloadController, err := r.getReleaseController(release, newStatus) + workloadController, err := r.getReleaseController(ctx, release, newStatus) if err != nil || workloadController == nil { return reconcile.Result{}, nil, nil } @@ -194,7 +197,7 @@ func (r *Executor) progressBatches(release *v1beta1.BatchRelease, newStatus *v1b } // GetWorkloadController pick the right workload controller to work on the workload -func (r *Executor) getReleaseController(release *v1beta1.BatchRelease, newStatus *v1beta1.BatchReleaseStatus) (control.Interface, error) { +func (r *Executor) getReleaseController(ctx context.Context, release *v1beta1.BatchRelease, newStatus *v1beta1.BatchReleaseStatus) (control.Interface, error) { targetRef := release.Spec.WorkloadRef gvk := schema.FromAPIVersionAndKind(targetRef.APIVersion, targetRef.Kind) if !util.IsSupportedWorkload(gvk) { @@ -233,26 +236,50 @@ func (r *Executor) getReleaseController(release *v1beta1.BatchRelease, newStatus case v1beta1.PartitionRollingStyle, "": if targetRef.APIVersion == appsv1alpha1.GroupVersion.String() && targetRef.Kind == reflect.TypeOf(appsv1alpha1.DaemonSet{}).Name() { klog.InfoS("Using DaemonSet partition-style release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) - return partitionstyle.NewControlPlane(daemonset.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil + return partitionstyle.NewControlPlane(ctx, daemonset.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil } if targetRef.APIVersion == apps.SchemeGroupVersion.String() && targetRef.Kind == reflect.TypeOf(apps.DaemonSet{}).Name() { klog.InfoS("Using Native DaemonSet partition-style release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) - return partitionstyle.NewControlPlane(nativedaemonset.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil + return partitionstyle.NewControlPlane(ctx, nativedaemonset.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil } if targetRef.APIVersion == appsv1alpha1.GroupVersion.String() && targetRef.Kind == reflect.TypeOf(appsv1alpha1.CloneSet{}).Name() { klog.InfoS("Using CloneSet partition-style release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) - return partitionstyle.NewControlPlane(cloneset.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil + return partitionstyle.NewControlPlane(ctx, cloneset.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil } if targetRef.APIVersion == apps.SchemeGroupVersion.String() && targetRef.Kind == reflect.TypeOf(apps.Deployment{}).Name() { + // Route to the MinReady controller when the feature gate is enabled, or + // when the Deployment still carries MinReady original-strategy annotations. + // The latter covers the gate being turned off mid-rollout: the old + // Recreate-mode controller would not recognize an inflated RollingUpdate + // Deployment as under its control, leaving the workload stuck in a + // half-initialized state. Keeping MinReady control lets it finalize and + // restore the original fields. + if utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) || r.deploymentHasMinReadyAnnotations(ctx, targetKey) { + klog.InfoS("Using Deployment MinReadySeconds partition-style release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) + return partitionstyle.NewControlPlane(ctx, partitiondeployment.NewMinReadyController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil + } klog.InfoS("Using Deployment partition-style release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) - return partitionstyle.NewControlPlane(partitiondeployment.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil + return partitionstyle.NewControlPlane(ctx, partitiondeployment.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil } klog.Info("Partition, but use StatefulSet-Like partition-style release controller for this batch release") } // try to use StatefulSet-like rollout controller by default klog.InfoS("Using StatefulSet-Like partition-style release controller for this batch release", "workload name", targetKey.Name, "namespace", targetKey.Namespace) - return partitionstyle.NewControlPlane(statefulset.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil + return partitionstyle.NewControlPlane(ctx, statefulset.NewController, r.client, r.recorder, release, newStatus, targetKey, gvk), nil +} + +// deploymentHasMinReadyAnnotations reports whether the target Deployment still +// carries MinReady original-strategy annotations, i.e. it was initialized by the +// MinReady controller and not yet finalized. Used to keep MinReady routing when +// the feature gate is disabled mid-rollout. A fetch failure (e.g. NotFound) +// returns false so routing falls back to the default controller. +func (r *Executor) deploymentHasMinReadyAnnotations(ctx context.Context, key types.NamespacedName) bool { + deployment := &apps.Deployment{} + if err := r.client.Get(ctx, key, deployment); err != nil { + return false + } + return v1beta1.HasMinReadyOriginalAnnotations(deployment.Annotations) } func (r *Executor) moveToNextBatch(release *v1beta1.BatchRelease, status *v1beta1.BatchReleaseStatus) { diff --git a/pkg/controller/batchrelease/context/context.go b/pkg/controller/batchrelease/context/context.go index 2078c2d3..e34d256c 100644 --- a/pkg/controller/batchrelease/context/context.go +++ b/pkg/controller/batchrelease/context/context.go @@ -97,9 +97,12 @@ func (bc *BatchContext) IsBatchReady() error { // batchLabelSatisfied return true if the expected batch label has been patched func batchLabelSatisfied(pods []*corev1.Pod, rolloutID string, targetCount int32) bool { - if rolloutID == "" || len(pods) == 0 { + if rolloutID == "" || targetCount <= 0 { return true } + if len(pods) == 0 { + return false + } patchedCount := util.WrappedPodCount(pods, func(pod *corev1.Pod) bool { if !pod.DeletionTimestamp.IsZero() { return false diff --git a/pkg/controller/batchrelease/context/context_test.go b/pkg/controller/batchrelease/context/context_test.go index 3d3941cd..704618e9 100644 --- a/pkg/controller/batchrelease/context/context_test.go +++ b/pkg/controller/batchrelease/context/context_test.go @@ -125,6 +125,15 @@ func TestIsBatchReady(t *testing.T) { updatedReady: 5, isReady: false, }, + "false: rollout-id, no pods listed": { + release: r(p(intstr.FromInt(1)), "1", "version-1"), + pods: nil, + labelDesired: 5, + desired: 5, + updated: 5, + updatedReady: 5, + isReady: false, + }, } for name, cs := range cases { @@ -181,3 +190,63 @@ func generatePodsWith(labels map[string]string, replicas int, beginOrder int) [] } return pods } + +// TestBatchLabelSatisfied is a regression matrix for the shared batchLabelSatisfied +// helper (P0-1). It is on the hot path for every partition-style control plane +// (CloneSet, StatefulSet, Advanced DaemonSet, Advanced Deployment, MinReady), so +// the empty-pod-list semantics must hold regardless of feature gate. The key +// change being locked down: rolloutID set AND targetCount > 0 AND no pods listed +// must return false (the batch label cannot be satisfied by zero pods), instead +// of the previous true. +func TestBatchLabelSatisfied(t *testing.T) { + labeledPods := generatePodsWith(map[string]string{ + v1beta1.RolloutIDLabel: "rollout-1", + }, 3, 0) + + cases := map[string]struct { + pods []*corev1.Pod + rolloutID string + targetCount int32 + want bool + }{ + "empty rolloutID short-circuits to true, no pods": { + pods: nil, rolloutID: "", targetCount: 5, want: true, + }, + "empty rolloutID short-circuits to true, with pods": { + pods: labeledPods, rolloutID: "", targetCount: 5, want: true, + }, + "targetCount zero short-circuits to true, no pods": { + pods: nil, rolloutID: "rollout-1", targetCount: 0, want: true, + }, + "targetCount negative short-circuits to true": { + pods: nil, rolloutID: "rollout-1", targetCount: -1, want: true, + }, + "rolloutID set, target>0, no pods listed -> false (P0-1 core change)": { + pods: nil, rolloutID: "rollout-1", targetCount: 3, want: false, + }, + "rolloutID set, target>0, empty (non-nil) pod slice -> false": { + pods: []*corev1.Pod{}, rolloutID: "rollout-1", targetCount: 3, want: false, + }, + "rolloutID set, enough labeled pods -> true": { + pods: labeledPods, rolloutID: "rollout-1", targetCount: 3, want: true, + }, + "rolloutID set, not enough labeled pods -> false": { + pods: labeledPods, rolloutID: "rollout-1", targetCount: 4, want: false, + }, + "rolloutID set, pods labeled with a different id -> false": { + pods: generatePodsWith(map[string]string{v1beta1.RolloutIDLabel: "rollout-2"}, 3, 0), + rolloutID: "rollout-1", + targetCount: 1, + want: false, + }, + } + + for name, cs := range cases { + t.Run(name, func(t *testing.T) { + if got := batchLabelSatisfied(cs.pods, cs.rolloutID, cs.targetCount); got != cs.want { + t.Fatalf("batchLabelSatisfied(pods=%d, id=%q, target=%d) = %v, want %v", + len(cs.pods), cs.rolloutID, cs.targetCount, got, cs.want) + } + }) + } +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/cloneset/control.go b/pkg/controller/batchrelease/control/partitionstyle/cloneset/control.go index aef389fa..ce09280d 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/cloneset/control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/cloneset/control.go @@ -78,7 +78,7 @@ func (rc *realController) ListOwnedPods() ([]*corev1.Pod, error) { return rc.pods, err } -func (rc *realController) Initialize(release *v1beta1.BatchRelease) error { +func (rc *realController) Initialize(ctx context.Context, release *v1beta1.BatchRelease) error { if control.IsControlledByBatchRelease(release, rc.object) { return nil } @@ -87,21 +87,21 @@ func (rc *realController) Initialize(release *v1beta1.BatchRelease) error { owner := control.BuildReleaseControlInfo(release) body := fmt.Sprintf(`{"metadata":{"annotations":{"%s":"%s"}},"spec":{"updateStrategy":{"paused":%v,"partition":"%s"}}}`, util.BatchReleaseControlAnnotation, owner, false, "100%") - return rc.client.Patch(context.TODO(), clone, client.RawPatch(types.MergePatchType, []byte(body))) + return rc.client.Patch(ctx, clone, client.RawPatch(types.MergePatchType, []byte(body))) } -func (rc *realController) UpgradeBatch(ctx *batchcontext.BatchContext) error { +func (rc *realController) UpgradeBatch(ctx context.Context, batchContext *batchcontext.BatchContext) error { var body string var desired int - switch partition := ctx.DesiredPartition; partition.Type { + switch partition := batchContext.DesiredPartition; partition.Type { case intstr.Int: desired = int(partition.IntVal) body = fmt.Sprintf(`{"spec":{"updateStrategy":{"partition": %d }}}`, partition.IntValue()) case intstr.String: - desired, _ = intstr.GetScaledValueFromIntOrPercent(&partition, int(ctx.Replicas), true) + desired, _ = intstr.GetScaledValueFromIntOrPercent(&partition, int(batchContext.Replicas), true) body = fmt.Sprintf(`{"spec":{"updateStrategy":{"partition":"%s"}}}`, partition.String()) } - current, _ := intstr.GetScaledValueFromIntOrPercent(&ctx.CurrentPartition, int(ctx.Replicas), true) + current, _ := intstr.GetScaledValueFromIntOrPercent(&batchContext.CurrentPartition, int(batchContext.Replicas), true) // current less than desired, which means current revision replicas will be less than desired, // in other word, update revision replicas will be more than desired, no need to update again. @@ -110,10 +110,10 @@ func (rc *realController) UpgradeBatch(ctx *batchcontext.BatchContext) error { } clone := util.GetEmptyObjectWithKey(rc.object) - return rc.client.Patch(context.TODO(), clone, client.RawPatch(types.MergePatchType, []byte(body))) + return rc.client.Patch(ctx, clone, client.RawPatch(types.MergePatchType, []byte(body))) } -func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { +func (rc *realController) Finalize(ctx context.Context, release *v1beta1.BatchRelease) error { if rc.object == nil { return nil } @@ -134,7 +134,7 @@ func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { body := fmt.Sprintf(`{"metadata":{"annotations":{"%s":null}}%s}`, util.BatchReleaseControlAnnotation, specBody) clone := util.GetEmptyObjectWithKey(rc.object) - return rc.client.Patch(context.TODO(), clone, client.RawPatch(types.MergePatchType, []byte(body))) + return rc.client.Patch(ctx, clone, client.RawPatch(types.MergePatchType, []byte(body))) } func (rc *realController) CalculateBatchContext(release *v1beta1.BatchRelease) (*batchcontext.BatchContext, error) { diff --git a/pkg/controller/batchrelease/control/partitionstyle/cloneset/control_test.go b/pkg/controller/batchrelease/control/partitionstyle/cloneset/control_test.go index 28a4dce7..17968159 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/cloneset/control_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/cloneset/control_test.go @@ -292,7 +292,7 @@ func TestRealController(t *testing.T) { controller, err := c.BuildController() Expect(err).NotTo(HaveOccurred()) - err = controller.Initialize(release) + err = controller.Initialize(context.Background(), release) Expect(err).NotTo(HaveOccurred()) fetch := &kruiseappsv1alpha1.CloneSet{} Expect(cli.Get(context.TODO(), cloneKey, fetch)).NotTo(HaveOccurred()) @@ -303,7 +303,7 @@ func TestRealController(t *testing.T) { for { batchContext, err := controller.CalculateBatchContext(release) Expect(err).NotTo(HaveOccurred()) - err = controller.UpgradeBatch(batchContext) + err = controller.UpgradeBatch(context.Background(), batchContext) fetch = &kruiseappsv1alpha1.CloneSet{} // mock Expect(cli.Get(context.TODO(), cloneKey, fetch)).NotTo(HaveOccurred()) @@ -316,7 +316,7 @@ func TestRealController(t *testing.T) { Expect(cli.Get(context.TODO(), cloneKey, fetch)).NotTo(HaveOccurred()) Expect(fetch.Spec.UpdateStrategy.Partition.StrVal).Should(Equal("90%")) - err = controller.Finalize(release) + err = controller.Finalize(context.Background(), release) Expect(err).NotTo(HaveOccurred()) fetch = &kruiseappsv1alpha1.CloneSet{} Expect(cli.Get(context.TODO(), cloneKey, fetch)).NotTo(HaveOccurred()) @@ -445,7 +445,7 @@ func TestFinalize(t *testing.T) { t.Fatalf("BuildController failed: %s", err.Error()) } cs.featureGateFunc() - err = c.Finalize(br) + err = c.Finalize(context.Background(), br) if err != nil { t.Fatalf("BuildController failed: %s", err.Error()) } diff --git a/pkg/controller/batchrelease/control/partitionstyle/control_plane.go b/pkg/controller/batchrelease/control/partitionstyle/control_plane.go index ff82e2f4..e7f9a9f6 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/control_plane.go +++ b/pkg/controller/batchrelease/control/partitionstyle/control_plane.go @@ -41,6 +41,7 @@ type realBatchControlPlane struct { client.Client record.EventRecorder patcher labelpatch.LabelPatcher + ctx context.Context release *v1beta1.BatchRelease newStatus *v1beta1.BatchReleaseStatus } @@ -48,28 +49,65 @@ type realBatchControlPlane struct { type NewInterfaceFunc func(cli client.Client, key types.NamespacedName, gvk schema.GroupVersionKind) Interface // NewControlPlane creates a new release controller with partitioned-style to drive batch release state machine -func NewControlPlane(f NewInterfaceFunc, cli client.Client, recorder record.EventRecorder, release *v1beta1.BatchRelease, newStatus *v1beta1.BatchReleaseStatus, key types.NamespacedName, gvk schema.GroupVersionKind) *realBatchControlPlane { +func NewControlPlane(ctx context.Context, f NewInterfaceFunc, cli client.Client, recorder record.EventRecorder, release *v1beta1.BatchRelease, newStatus *v1beta1.BatchReleaseStatus, key types.NamespacedName, gvk schema.GroupVersionKind) *realBatchControlPlane { return &realBatchControlPlane{ Client: cli, EventRecorder: recorder, newStatus: newStatus, Interface: f(cli, key, gvk), + ctx: nonNilContext(ctx), release: release.DeepCopy(), patcher: labelpatch.NewLabelPatcher(cli, klog.KObj(release), release.Spec.ReleasePlan.Batches), } } -func (rc *realBatchControlPlane) Initialize() error { - controller, err := rc.BuildController() +func nonNilContext(ctx context.Context) context.Context { + if ctx != nil { + return ctx + } + return context.Background() +} + +func (rc *realBatchControlPlane) bindMinReadyStatus(controller Interface) { + if binder, ok := controller.(MinReadyStatusBinder); ok { + binder.BindMinReadyStatus(rc.release, rc.newStatus, rc.EventRecorder) + } +} + +func (rc *realBatchControlPlane) reportOperationFailed(controller Interface, reason string, err error) { + if err == nil { + return + } + if lifecycle, ok := controller.(MinReadyLifecycle); ok { + lifecycle.RecordOperationFailed(reason, err) + return + } + klog.ErrorS(err, "Partition-style control plane failed", "release", klog.KObj(rc.release), "reason", reason) +} + +func (rc *realBatchControlPlane) Initialize() (err error) { + controller := rc.Interface + var reportErr error + defer func() { + rc.reportOperationFailed(controller, "MinReadyInitializeFailed", reportErr) + }() + + controller, err = rc.BuildController() if err != nil { + reportErr = err return err } + rc.bindMinReadyStatus(controller) // claim workload under our control - err = controller.Initialize(rc.release) + err = controller.Initialize(rc.ctx, rc.release) if err != nil { + reportErr = err return err } + if lifecycle, ok := controller.(MinReadyLifecycle); ok { + lifecycle.RecordInitialized() + } // record revision and replicas workloadInfo := controller.GetWorkloadInfo() @@ -85,43 +123,75 @@ func (rc *realBatchControlPlane) Initialize() error { return err } -func (rc *realBatchControlPlane) UpgradeBatch() error { - controller, err := rc.BuildController() +func (rc *realBatchControlPlane) UpgradeBatch() (err error) { + controller := rc.Interface + var reportErr error + defer func() { + rc.reportOperationFailed(controller, "MinReadyBatchingFailed", reportErr) + }() + + controller, err = rc.BuildController() if err != nil { + reportErr = err return err } + rc.bindMinReadyStatus(controller) if controller.GetWorkloadInfo().Replicas == 0 { + if lifecycle, ok := controller.(MinReadyLifecycle); ok { + lifecycle.RecordZeroReplicaBatching() + } return nil } err = rc.countAndUpdateNoNeedUpdateReplicas() if err != nil { + reportErr = err return err } batchContext, err := controller.CalculateBatchContext(rc.release) if err != nil { + reportErr = err return err } klog.Infof("BatchRelease %v calculated context when upgrade batch: %s", klog.KObj(rc.release), batchContext.Log()) - err = controller.UpgradeBatch(batchContext) + err = controller.UpgradeBatch(rc.ctx, batchContext) if err != nil { + reportErr = err return err } - return rc.patcher.PatchPodBatchLabel(batchContext) + if err := rc.patcher.PatchPodBatchLabel(batchContext); err != nil { + reportErr = err + return err + } + if lifecycle, ok := controller.(MinReadyLifecycle); ok { + lifecycle.RecordBatchAdvanced() + } + return nil } -func (rc *realBatchControlPlane) EnsureBatchPodsReadyAndLabeled() error { - controller, err := rc.BuildController() +func (rc *realBatchControlPlane) EnsureBatchPodsReadyAndLabeled() (err error) { + controller := rc.Interface + var reportErr error + defer func() { + rc.reportOperationFailed(controller, "MinReadyBatchingFailed", reportErr) + }() + + controller, err = rc.BuildController() if err != nil { + reportErr = err return err } + rc.bindMinReadyStatus(controller) if controller.GetWorkloadInfo().Replicas == 0 { + if lifecycle, ok := controller.(MinReadyLifecycle); ok { + lifecycle.RecordZeroReplicaBatchReady() + } return nil } @@ -129,23 +199,58 @@ func (rc *realBatchControlPlane) EnsureBatchPodsReadyAndLabeled() error { // the target calculated should be consistent with UpgradeBatch. batchContext, err := controller.CalculateBatchContext(rc.release) if err != nil { + reportErr = err return err } klog.Infof("BatchRelease %v calculated context when check batch ready: %s", klog.KObj(rc.release), batchContext.Log()) - return batchContext.IsBatchReady() + if reconciler, ok := controller.(MinReadyDriftReconciler); ok { + if err := reconciler.ReconcileMaxUnavailableDrift(rc.ctx, batchContext); err != nil { + reportErr = err + return err + } + } + + if err := batchContext.IsBatchReady(); err != nil { + if lifecycle, ok := controller.(MinReadyLifecycle); ok { + lifecycle.ObserveBatchWait() + } + return err + } + if lifecycle, ok := controller.(MinReadyLifecycle); ok { + lifecycle.RecordBatchReady() + } + return nil } -func (rc *realBatchControlPlane) Finalize() error { - controller, err := rc.BuildController() +func (rc *realBatchControlPlane) Finalize() (err error) { + controller := rc.Interface + var reportErr error + defer func() { + rc.reportOperationFailed(controller, "MinReadyFinalizeFailed", reportErr) + }() + + controller, err = rc.BuildController() if err != nil { - return client.IgnoreNotFound(err) + if err := client.IgnoreNotFound(err); err != nil { + reportErr = err + return err + } + return nil } + rc.bindMinReadyStatus(controller) // release workload control info and clean up resources if it needs - return controller.Finalize(rc.release) + if err := controller.Finalize(rc.ctx, rc.release); err != nil { + reportErr = err + return err + } + if lifecycle, ok := controller.(MinReadyLifecycle); ok { + lifecycle.RecordFinalized() + } + return nil } func (rc *realBatchControlPlane) SyncWorkloadInformation() (control.WorkloadEventType, *util.WorkloadInfo, error) { @@ -244,7 +349,7 @@ func (rc *realBatchControlPlane) markNoNeedUpdatePodsIfNeeds() (*int32, error) { for _, pod := range filterPods { clone := util.GetEmptyObjectWithKey(pod) body := fmt.Sprintf(`{"metadata":{"labels":{"%s":"%s"}}}`, util.NoNeedUpdatePodLabel, rolloutID) - err = rc.Patch(context.TODO(), clone, client.RawPatch(types.StrategicMergePatchType, []byte(body))) + err = rc.Patch(rc.ctx, clone, client.RawPatch(types.StrategicMergePatchType, []byte(body))) if err != nil { klog.Errorf("Failed to patch no-need-update label(%v) to pod %v, err: %v", rolloutID, klog.KObj(pod), err) return &noNeedUpdateReplicas, err diff --git a/pkg/controller/batchrelease/control/partitionstyle/control_plane_test.go b/pkg/controller/batchrelease/control/partitionstyle/control_plane_test.go new file mode 100644 index 00000000..b81219b0 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/control_plane_test.go @@ -0,0 +1,654 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package partitionstyle + +import ( + "context" + "errors" + "testing" + + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/tools/record" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + "github.com/openkruise/rollouts/api/v1alpha1" + "github.com/openkruise/rollouts/api/v1beta1" + batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" + controlpkg "github.com/openkruise/rollouts/pkg/controller/batchrelease/control" + "github.com/openkruise/rollouts/pkg/util" +) + +type fakePartitionController struct { + buildResult Interface + buildErr error + minReady bool + + workloadInfo *util.WorkloadInfo + pods []*corev1.Pod + listErr error + batchCtx *batchcontext.BatchContext + calcErr error + + initErr error + upgradeErr error + finalizeErr error + reconcileErr error + + buildCalls int + initCalls int + upgradeCalls int + finalizeCalls int + reconcileCalls int + calculateCalls int + listCalls int + + statusWriter *MinReadyStatusWriter +} + +func (f *fakePartitionController) BuildController() (Interface, error) { + f.buildCalls++ + if f.buildErr != nil { + return nil, f.buildErr + } + if f.buildResult != nil { + return f.buildResult, nil + } + return f, nil +} + +func (f *fakePartitionController) BindMinReadyStatus(release *v1beta1.BatchRelease, status *v1beta1.BatchReleaseStatus, recorder record.EventRecorder) { + if f.minReady { + f.statusWriter = NewMinReadyStatusWriter(release, status, recorder) + } +} + +func (f *fakePartitionController) RecordOperationFailed(reason string, err error) { + if f.statusWriter != nil { + f.statusWriter.RecordDegraded(reason, err) + } +} + +func (f *fakePartitionController) RecordZeroReplicaBatching() { + if f.statusWriter != nil { + f.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatching", "MinReadySeconds strategy has no replicas to upgrade") + } +} + +func (f *fakePartitionController) RecordBatchAdvanced() { + if f.statusWriter != nil { + f.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatching", "MinReadySeconds strategy advanced the current batch") + } +} + +func (f *fakePartitionController) RecordZeroReplicaBatchReady() { + if f.statusWriter != nil { + f.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatchReady", "MinReadySeconds strategy batch is ready") + } +} + +func (f *fakePartitionController) RecordBatchReady() { + if f.statusWriter != nil { + f.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatchReady", "MinReadySeconds strategy batch is ready") + } +} + +func (f *fakePartitionController) RecordInitialized() { + if f.statusWriter != nil { + f.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyInitialized, "MinReadyInitialized", "MinReadySeconds strategy initialized") + } +} + +func (f *fakePartitionController) RecordFinalized() { + if f.statusWriter != nil { + f.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyFinalized, "MinReadyFinalized", "MinReadySeconds strategy finalized") + } +} + +func (f *fakePartitionController) ObserveBatchWait() { + if f.statusWriter == nil { + return + } + status := f.statusWriter.BatchReleaseStatus() + if status == nil { + return + } + condition := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyBatching) + ObserveMinReadyBatchWait(f.statusWriter.BatchRelease(), condition) +} + +func (f *fakePartitionController) GetWorkloadInfo() *util.WorkloadInfo { + if f.workloadInfo != nil { + return f.workloadInfo + } + return testWorkloadInfo(3, 1, "stable", "update") +} + +func (f *fakePartitionController) ListOwnedPods() ([]*corev1.Pod, error) { + f.listCalls++ + return f.pods, f.listErr +} + +func (f *fakePartitionController) CalculateBatchContext(release *v1beta1.BatchRelease) (*batchcontext.BatchContext, error) { + f.calculateCalls++ + if f.calcErr != nil { + return nil, f.calcErr + } + if f.batchCtx != nil { + return f.batchCtx, nil + } + return readyBatchContext(), nil +} + +func (f *fakePartitionController) Initialize(ctx context.Context, release *v1beta1.BatchRelease) error { + f.initCalls++ + return f.initErr +} + +func (f *fakePartitionController) UpgradeBatch(context.Context, *batchcontext.BatchContext) error { + f.upgradeCalls++ + return f.upgradeErr +} + +func (f *fakePartitionController) ReconcileMaxUnavailableDrift(context.Context, *batchcontext.BatchContext) error { + f.reconcileCalls++ + return f.reconcileErr +} + +func (f *fakePartitionController) Finalize(context.Context, *v1beta1.BatchRelease) error { + f.finalizeCalls++ + return f.finalizeErr +} + +func (f *fakePartitionController) IsMinReadyControl() bool { + return f.minReady +} + +type fakeBatchLabelPatcher struct { + calls int + err error +} + +func (f *fakeBatchLabelPatcher) PatchPodBatchLabel(*batchcontext.BatchContext) error { + f.calls++ + return f.err +} + +func TestNewControlPlaneCopiesReleaseAndNormalizesContext(t *testing.T) { + release := testBatchRelease() + status := &v1beta1.BatchReleaseStatus{} + controller := &fakePartitionController{} + rc := NewControlPlane(nil, func(client.Client, types.NamespacedName, schema.GroupVersionKind) Interface { + return controller + }, fake.NewClientBuilder().Build(), record.NewFakeRecorder(10), release, status, types.NamespacedName{}, schema.GroupVersionKind{}) + + if rc.ctx == nil { + t.Fatalf("ctx is nil, want background context") + } + if rc.release == release { + t.Fatalf("release was not deep-copied") + } + release.Name = "changed" + if rc.release.Name == "changed" { + t.Fatalf("release mutation leaked into control plane copy") + } + + ctx := context.WithValue(context.Background(), struct{}{}, "value") + if nonNilContext(ctx) != ctx { + t.Fatalf("nonNilContext did not preserve non-nil context") + } +} + +func TestControlPlaneInitializeRecordsMinReadyWorkloadInfo(t *testing.T) { + controller := &fakePartitionController{ + minReady: true, + workloadInfo: testWorkloadInfo(5, 2, "stable", "update"), + } + status := &v1beta1.BatchReleaseStatus{} + rc := newTestControlPlane(controller, status) + + if err := rc.Initialize(); err != nil { + t.Fatalf("Initialize() error = %v", err) + } + if controller.initCalls != 1 { + t.Fatalf("initCalls = %d, want 1", controller.initCalls) + } + if status.StableRevision != "stable" || status.UpdateRevision != "update" || status.ObservedWorkloadReplicas != 5 { + t.Fatalf("status revisions/replicas not updated: %#v", status) + } + condition := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyInitialized) + if condition == nil || condition.Reason != "MinReadyInitialized" { + t.Fatalf("MinReadyInitialized condition = %#v", condition) + } +} + +func TestControlPlaneUpgradeBatchMinReadyPaths(t *testing.T) { + t.Run("no replicas records ready without upgrading", func(t *testing.T) { + controller := &fakePartitionController{ + minReady: true, + workloadInfo: testWorkloadInfo(0, 0, "stable", "update"), + } + status := &v1beta1.BatchReleaseStatus{} + rc := newTestControlPlane(controller, status) + + if err := rc.UpgradeBatch(); err != nil { + t.Fatalf("UpgradeBatch() error = %v", err) + } + if controller.upgradeCalls != 0 { + t.Fatalf("upgradeCalls = %d, want 0", controller.upgradeCalls) + } + condition := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyBatching) + if condition == nil || condition.Reason != "MinReadyBatching" { + t.Fatalf("MinReadyBatching condition = %#v", condition) + } + }) + + t.Run("successful upgrade patches labels and records normal condition", func(t *testing.T) { + controller := &fakePartitionController{minReady: true} + patcher := &fakeBatchLabelPatcher{} + status := &v1beta1.BatchReleaseStatus{} + rc := newTestControlPlane(controller, status) + rc.patcher = patcher + + if err := rc.UpgradeBatch(); err != nil { + t.Fatalf("UpgradeBatch() error = %v", err) + } + if controller.calculateCalls != 1 || controller.upgradeCalls != 1 || patcher.calls != 1 { + t.Fatalf("calls calculate=%d upgrade=%d patch=%d, want 1/1/1", controller.calculateCalls, controller.upgradeCalls, patcher.calls) + } + condition := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyBatching) + if condition == nil || condition.Reason != "MinReadyBatching" { + t.Fatalf("MinReadyBatching condition = %#v", condition) + } + }) + + t.Run("calculate error records degraded condition", func(t *testing.T) { + controller := &fakePartitionController{ + minReady: true, + calcErr: errors.Join(errors.New("strategy drift"), ErrMinReadyDriftDetected), + } + status := &v1beta1.BatchReleaseStatus{} + rc := newTestControlPlane(controller, status) + + if err := rc.UpgradeBatch(); err == nil { + t.Fatalf("UpgradeBatch() error = nil, want error") + } + condition := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyDegraded) + if condition == nil || condition.Reason != "MinReadyDegradedDriftDetected" { + t.Fatalf("MinReadyDegraded condition = %#v", condition) + } + if status.Message == "" { + t.Fatalf("status.Message is empty, want degraded error") + } + }) +} + +func TestControlPlaneEnsureBatchPodsReadyAndLabeled(t *testing.T) { + t.Run("not ready returns readiness error", func(t *testing.T) { + controller := &fakePartitionController{ + minReady: true, + batchCtx: &batchcontext.BatchContext{ + Replicas: 3, + DesiredUpdatedReplicas: 3, + UpdatedReplicas: 1, + }, + } + now := metav1.Now() + status := &v1beta1.BatchReleaseStatus{ + Conditions: []v1beta1.RolloutCondition{{ + Type: v1beta1.RolloutConditionMinReadyBatching, + Status: corev1.ConditionTrue, + LastTransitionTime: now, + }}, + } + rc := newTestControlPlane(controller, status) + + if err := rc.EnsureBatchPodsReadyAndLabeled(); err == nil { + t.Fatalf("EnsureBatchPodsReadyAndLabeled() error = nil, want not ready error") + } + if controller.calculateCalls != 1 { + t.Fatalf("calculateCalls = %d, want 1", controller.calculateCalls) + } + if controller.reconcileCalls != 1 { + t.Fatalf("reconcileCalls = %d, want 1", controller.reconcileCalls) + } + if degraded := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyDegraded); degraded != nil { + t.Fatalf("MinReadyDegraded condition = %#v, want nil for normal batch wait", degraded) + } + }) + + t.Run("ready records batch ready", func(t *testing.T) { + controller := &fakePartitionController{minReady: true} + status := &v1beta1.BatchReleaseStatus{} + rc := newTestControlPlane(controller, status) + + if err := rc.EnsureBatchPodsReadyAndLabeled(); err != nil { + t.Fatalf("EnsureBatchPodsReadyAndLabeled() error = %v", err) + } + condition := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyBatching) + if condition == nil || condition.Reason != "MinReadyBatchReady" { + t.Fatalf("MinReadyBatchReady condition = %#v", condition) + } + }) + + t.Run("drift reconcile error records degraded condition", func(t *testing.T) { + controller := &fakePartitionController{ + minReady: true, + reconcileErr: errors.Join(errors.New("window drift"), ErrMinReadyDriftDetected), + } + status := &v1beta1.BatchReleaseStatus{} + rc := newTestControlPlane(controller, status) + + if err := rc.EnsureBatchPodsReadyAndLabeled(); err == nil { + t.Fatalf("EnsureBatchPodsReadyAndLabeled() error = nil, want drift error") + } + if controller.reconcileCalls != 1 { + t.Fatalf("reconcileCalls = %d, want 1", controller.reconcileCalls) + } + condition := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyDegraded) + if condition == nil || condition.Reason != "MinReadyDegradedDriftDetected" { + t.Fatalf("MinReadyDegraded condition = %#v", condition) + } + }) +} + +func TestControlPlaneFinalizeMinReadyPaths(t *testing.T) { + t.Run("not found is ignored", func(t *testing.T) { + controller := &fakePartitionController{ + buildErr: apierrors.NewNotFound(schema.GroupResource{Group: "apps", Resource: "deployments"}, "missing"), + } + rc := newTestControlPlane(controller, &v1beta1.BatchReleaseStatus{}) + + if err := rc.Finalize(); err != nil { + t.Fatalf("Finalize() error = %v", err) + } + }) + + t.Run("successful minReady finalize clears degraded condition", func(t *testing.T) { + controller := &fakePartitionController{minReady: true} + status := &v1beta1.BatchReleaseStatus{ + Message: "previous degraded", + Conditions: []v1beta1.RolloutCondition{{ + Type: v1beta1.RolloutConditionMinReadyDegraded, + Status: corev1.ConditionTrue, + Reason: "MinReadyDegradedDriftDetected", + }}, + } + rc := newTestControlPlane(controller, status) + + if err := rc.Finalize(); err != nil { + t.Fatalf("Finalize() error = %v", err) + } + if controller.finalizeCalls != 1 { + t.Fatalf("finalizeCalls = %d, want 1", controller.finalizeCalls) + } + finalized := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyFinalized) + if finalized == nil || finalized.Reason != "MinReadyFinalized" { + t.Fatalf("MinReadyFinalized condition = %#v", finalized) + } + degraded := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyDegraded) + if degraded == nil || degraded.Status != corev1.ConditionFalse { + t.Fatalf("MinReadyDegraded condition = %#v, want false", degraded) + } + if status.Message != "" { + t.Fatalf("status.Message = %q, want empty", status.Message) + } + }) +} + +func TestControlPlaneSyncWorkloadInformationStates(t *testing.T) { + tests := []struct { + name string + release func() *v1beta1.BatchRelease + controller *fakePartitionController + wantEvent controlpkg.WorkloadEventType + wantErr bool + }{ + { + name: "deleted release is ignored", + release: func() *v1beta1.BatchRelease { + release := testBatchRelease() + now := metav1.Now() + release.DeletionTimestamp = &now + return release + }, + controller: &fakePartitionController{}, + wantEvent: controlpkg.WorkloadNormalState, + }, + { + name: "workload gone", + controller: &fakePartitionController{ + buildErr: apierrors.NewNotFound(schema.GroupResource{Group: "apps", Resource: "deployments"}, "missing"), + }, + wantEvent: controlpkg.WorkloadHasGone, + wantErr: true, + }, + { + name: "build error", + controller: &fakePartitionController{buildErr: errors.New("build failed")}, + wantEvent: controlpkg.WorkloadUnknownState, + wantErr: true, + }, + { + name: "still reconciling", + controller: &fakePartitionController{workloadInfo: &util.WorkloadInfo{ + LogKey: "workload", + ObjectMeta: metav1.ObjectMeta{ + Generation: 2, + }, + Replicas: 5, + Status: util.WorkloadStatus{ + Replicas: 5, + UpdatedReplicas: 2, + ObservedGeneration: 1, + StableRevision: "stable", + UpdateRevision: "update", + }, + }}, + wantEvent: controlpkg.WorkloadStillReconciling, + }, + { + name: "promoted", + controller: &fakePartitionController{workloadInfo: testWorkloadInfo(5, 5, "stable", "update")}, + wantEvent: controlpkg.WorkloadNormalState, + }, + { + name: "scaling", + controller: &fakePartitionController{workloadInfo: testWorkloadInfo(6, 2, "stable", "update")}, + wantEvent: controlpkg.WorkloadReplicasChanged, + }, + { + name: "rollback", + controller: &fakePartitionController{workloadInfo: testWorkloadInfo(5, 2, "stable", "stable")}, + wantEvent: controlpkg.WorkloadRollbackInBatch, + }, + { + name: "revision changed", + controller: &fakePartitionController{workloadInfo: testWorkloadInfo(5, 2, "stable", "other")}, + wantEvent: controlpkg.WorkloadPodTemplateChanged, + }, + { + name: "normal", + controller: &fakePartitionController{workloadInfo: testWorkloadInfo(5, 2, "stable", "update")}, + wantEvent: controlpkg.WorkloadNormalState, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + status := &v1beta1.BatchReleaseStatus{ + StableRevision: "stable", + UpdateRevision: "update", + ObservedWorkloadReplicas: 5, + } + rc := newTestControlPlane(tt.controller, status) + if tt.release != nil { + rc.release = tt.release() + } + + got, info, err := rc.SyncWorkloadInformation() + if (err != nil) != tt.wantErr { + t.Fatalf("SyncWorkloadInformation() error = %v, wantErr %v", err, tt.wantErr) + } + if got != tt.wantEvent { + t.Fatalf("event = %s, want %s", got, tt.wantEvent) + } + if tt.name == "deleted release is ignored" && info != nil { + t.Fatalf("info = %#v, want nil for deleted release", info) + } + }) + } +} + +func TestControlPlaneNoNeedUpdateReplicaHelpers(t *testing.T) { + t.Run("rollback without rollout id returns current updated replicas", func(t *testing.T) { + controller := &fakePartitionController{} + status := &v1beta1.BatchReleaseStatus{ + CanaryStatus: v1beta1.BatchReleaseCanaryStatus{UpdatedReplicas: 2}, + } + rc := newTestControlPlane(controller, status) + rc.release.Annotations = map[string]string{v1alpha1.RollbackInBatchAnnotation: "true"} + + got, err := rc.markNoNeedUpdatePodsIfNeeds() + if err != nil { + t.Fatalf("markNoNeedUpdatePodsIfNeeds() error = %v", err) + } + if got == nil || *got != 2 { + t.Fatalf("noNeedUpdateReplicas = %v, want 2", got) + } + }) + + t.Run("count refreshes status from matching pods", func(t *testing.T) { + noNeed := int32(0) + controller := &fakePartitionController{ + pods: []*corev1.Pod{ + testPod("matched", map[string]string{ + apps.ControllerRevisionHashLabelKey: "hash", + util.NoNeedUpdatePodLabel: "rollout-1", + }), + testPod("different-rollout", map[string]string{ + apps.ControllerRevisionHashLabelKey: "hash", + util.NoNeedUpdatePodLabel: "rollout-2", + }), + testPod("different-revision", map[string]string{ + apps.ControllerRevisionHashLabelKey: "old", + util.NoNeedUpdatePodLabel: "rollout-1", + }), + }, + } + status := &v1beta1.BatchReleaseStatus{ + UpdateRevision: "hash", + CanaryStatus: v1beta1.BatchReleaseCanaryStatus{NoNeedUpdateReplicas: &noNeed}, + } + rc := newTestControlPlane(controller, status) + rc.release.Spec.ReleasePlan.RolloutID = "rollout-1" + rc.release.Status.UpdateRevision = "hash" + rc.release.Status.CanaryStatus.NoNeedUpdateReplicas = &noNeed + + if err := rc.countAndUpdateNoNeedUpdateReplicas(); err != nil { + t.Fatalf("countAndUpdateNoNeedUpdateReplicas() error = %v", err) + } + if *status.CanaryStatus.NoNeedUpdateReplicas != 1 { + t.Fatalf("status noNeedUpdateReplicas = %d, want 1", *status.CanaryStatus.NoNeedUpdateReplicas) + } + if *rc.release.Status.CanaryStatus.NoNeedUpdateReplicas != 1 { + t.Fatalf("release noNeedUpdateReplicas = %d, want 1", *rc.release.Status.CanaryStatus.NoNeedUpdateReplicas) + } + }) +} + +func newTestControlPlane(controller *fakePartitionController, status *v1beta1.BatchReleaseStatus) *realBatchControlPlane { + return &realBatchControlPlane{ + Interface: controller, + Client: fake.NewClientBuilder().Build(), + EventRecorder: record.NewFakeRecorder(20), + patcher: &fakeBatchLabelPatcher{}, + ctx: context.Background(), + release: testBatchRelease(), + newStatus: status, + } +} + +func testBatchRelease() *v1beta1.BatchRelease { + return &v1beta1.BatchRelease{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "default", + Name: "release", + }, + Spec: v1beta1.BatchReleaseSpec{ + ReleasePlan: v1beta1.ReleasePlan{ + Batches: []v1beta1.ReleaseBatch{ + {CanaryReplicas: intstr.FromInt(1)}, + {CanaryReplicas: intstr.FromInt(3)}, + }, + }, + }, + } +} + +func testWorkloadInfo(replicas, updatedReplicas int32, stableRevision, updateRevision string) *util.WorkloadInfo { + return &util.WorkloadInfo{ + LogKey: "workload", + ObjectMeta: metav1.ObjectMeta{ + Generation: 1, + }, + Replicas: replicas, + Status: util.WorkloadStatus{ + Replicas: replicas, + UpdatedReplicas: updatedReplicas, + ObservedGeneration: 1, + StableRevision: stableRevision, + UpdateRevision: updateRevision, + }, + } +} + +func readyBatchContext() *batchcontext.BatchContext { + return &batchcontext.BatchContext{ + Replicas: 3, + CurrentBatch: 0, + UpdatedReplicas: 1, + UpdatedReadyReplicas: 1, + PlannedUpdatedReplicas: 1, + DesiredUpdatedReplicas: 1, + DesiredPartition: intstr.FromInt(2), + CurrentPartition: intstr.FromInt(3), + NoNeedUpdatedReplicas: nil, + FailureThreshold: nil, + Pods: nil, + UpdateRevision: "update", + RolloutID: "", + DesiredSurge: intstr.FromInt(0), + CurrentSurge: intstr.FromInt(0), + } +} + +func testPod(name string, labels map[string]string) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "default", + Name: name, + Labels: labels, + }, + } +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/daemonset/control.go b/pkg/controller/batchrelease/control/partitionstyle/daemonset/control.go index 90f6c7fd..3c2dceba 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/daemonset/control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/daemonset/control.go @@ -92,7 +92,7 @@ func (rc *realController) ListOwnedPods() ([]*corev1.Pod, error) { return rc.pods, err } -func (rc *realController) Initialize(release *v1beta1.BatchRelease) error { +func (rc *realController) Initialize(ctx context.Context, release *v1beta1.BatchRelease) error { if control.IsControlledByBatchRelease(release, rc.object) { return nil } @@ -102,13 +102,13 @@ func (rc *realController) Initialize(release *v1beta1.BatchRelease) error { body := fmt.Sprintf(`{"metadata":{"annotations":{"%s":"%s"}},"spec":{"updateStrategy":{"rollingUpdate":{"paused":%v,"partition":%d}}}}`, util.BatchReleaseControlAnnotation, owner, false, rc.Replicas) - return rc.client.Patch(context.TODO(), daemon, client.RawPatch(types.MergePatchType, []byte(body))) + return rc.client.Patch(ctx, daemon, client.RawPatch(types.MergePatchType, []byte(body))) } -func (rc *realController) UpgradeBatch(ctx *batchcontext.BatchContext) error { +func (rc *realController) UpgradeBatch(ctx context.Context, batchContext *batchcontext.BatchContext) error { - desired := ctx.DesiredPartition.IntVal - current := ctx.CurrentPartition.IntVal + desired := batchContext.DesiredPartition.IntVal + current := batchContext.CurrentPartition.IntVal // current less than desired, which means current revision replicas will be less than desired, // in other word, update revision replicas will be more than desired, no need to update again. if current <= desired { @@ -118,10 +118,10 @@ func (rc *realController) UpgradeBatch(ctx *batchcontext.BatchContext) error { body := fmt.Sprintf(`{"spec":{"updateStrategy":{"rollingUpdate":{"partition":%d}}}}`, desired) daemon := util.GetEmptyObjectWithKey(rc.object) - return rc.client.Patch(context.TODO(), daemon, client.RawPatch(types.MergePatchType, []byte(body))) + return rc.client.Patch(ctx, daemon, client.RawPatch(types.MergePatchType, []byte(body))) } -func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { +func (rc *realController) Finalize(ctx context.Context, release *v1beta1.BatchRelease) error { if rc.object == nil { return nil } @@ -137,7 +137,7 @@ func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { } body := fmt.Sprintf(`{"metadata":{"annotations":{"%s":null}}%s}`, util.BatchReleaseControlAnnotation, specBody) daemon := util.GetEmptyObjectWithKey(rc.object) - return rc.client.Patch(context.TODO(), daemon, client.RawPatch(types.MergePatchType, []byte(body))) + return rc.client.Patch(ctx, daemon, client.RawPatch(types.MergePatchType, []byte(body))) } func (rc *realController) CalculateBatchContext(release *v1beta1.BatchRelease) (*batchcontext.BatchContext, error) { diff --git a/pkg/controller/batchrelease/control/partitionstyle/daemonset/control_test.go b/pkg/controller/batchrelease/control/partitionstyle/daemonset/control_test.go index 1b42041a..fa98bebc 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/daemonset/control_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/daemonset/control_test.go @@ -341,7 +341,7 @@ func TestRealController(t *testing.T) { controller, err := c.BuildController() Expect(err).NotTo(HaveOccurred()) - err = controller.Initialize(release) + err = controller.Initialize(context.Background(), release) Expect(err).NotTo(HaveOccurred()) fetch := &kruiseappsv1alpha1.DaemonSet{} @@ -356,7 +356,7 @@ func TestRealController(t *testing.T) { for { batchContext, err := controller.CalculateBatchContext(release) Expect(err).NotTo(HaveOccurred()) - err = controller.UpgradeBatch(batchContext) + err = controller.UpgradeBatch(context.Background(), batchContext) fetch = &kruiseappsv1alpha1.DaemonSet{} // mock Expect(cli.Get(context.TODO(), daemonKey, fetch)).NotTo(HaveOccurred()) @@ -371,7 +371,7 @@ func TestRealController(t *testing.T) { fmt.Println(*fetch.Spec.UpdateStrategy.RollingUpdate.Partition) Expect(*fetch.Spec.UpdateStrategy.RollingUpdate.Partition).Should(Equal(int32(9))) - err = controller.Finalize(release) + err = controller.Finalize(context.Background(), release) Expect(err).NotTo(HaveOccurred()) fetch = &kruiseappsv1alpha1.DaemonSet{} Expect(cli.Get(context.TODO(), daemonKey, fetch)).NotTo(HaveOccurred()) diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/control.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/control.go index 9f3ba508..669c56e2 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/control.go @@ -49,6 +49,14 @@ type realController struct { } func NewController(cli client.Client, key types.NamespacedName, _ schema.GroupVersionKind) partitionstyle.Interface { + return newRealController(cli, key) +} + +func NewMinReadyController(cli client.Client, key types.NamespacedName, _ schema.GroupVersionKind) partitionstyle.Interface { + return &MinReadyControl{realController: newRealController(cli, key)} +} + +func newRealController(cli client.Client, key types.NamespacedName) *realController { return &realController{ key: key, client: cli, @@ -81,7 +89,7 @@ func (rc *realController) ListOwnedPods() ([]*corev1.Pod, error) { return rc.pods, err } -func (rc *realController) Initialize(release *v1beta1.BatchRelease) error { +func (rc *realController) Initialize(ctx context.Context, release *v1beta1.BatchRelease) error { if deploymentutil.IsUnderRolloutControl(rc.object) { return nil // No need initialize again. } @@ -109,11 +117,11 @@ func (rc *realController) Initialize(release *v1beta1.BatchRelease) error { // Disable the native deployment controller patchData.UpdatePaused(true) - patchData.UpdateStrategy(apps.DeploymentStrategy{Type: apps.RecreateDeploymentStrategyType}) - return rc.client.Patch(context.TODO(), d, patchData) + patchData.UpdateRecreateStrategy() + return rc.client.Patch(ctx, d, patchData) } -func (rc *realController) UpgradeBatch(ctx *batchcontext.BatchContext) error { +func (rc *realController) UpgradeBatch(ctx context.Context, batchContext *batchcontext.BatchContext) error { if !deploymentutil.IsUnderRolloutControl(rc.object) { klog.Warningf("Cannot upgrade batch, because "+ "deployment %v has ridden out of our control", klog.KObj(rc.object)) @@ -121,18 +129,18 @@ func (rc *realController) UpgradeBatch(ctx *batchcontext.BatchContext) error { } strategy := util.GetDeploymentStrategy(rc.object) - if control.IsCurrentMoreThanOrEqualToDesired(strategy.Partition, ctx.DesiredPartition) { + if control.IsCurrentMoreThanOrEqualToDesired(strategy.Partition, batchContext.DesiredPartition) { return nil // Satisfied, no need patch again. } d := rc.object.DeepCopy() - strategy.Partition = ctx.DesiredPartition + strategy.Partition = batchContext.DesiredPartition patchData := patch.NewDeploymentPatch() patchData.InsertAnnotation(v1alpha1.DeploymentStrategyAnnotation, util.DumpJSON(&strategy)) - return rc.client.Patch(context.TODO(), d, patchData) + return rc.client.Patch(ctx, d, patchData) } -func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { +func (rc *realController) Finalize(ctx context.Context, release *v1beta1.BatchRelease) error { if rc.object == nil { return nil // No need to finalize again. } @@ -161,7 +169,7 @@ func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { } d := rc.object.DeepCopy() patchData.DeleteAnnotation(util.BatchReleaseControlAnnotation) - return rc.client.Patch(context.TODO(), d, patchData) + return rc.client.Patch(ctx, d, patchData) } func (rc *realController) CalculateBatchContext(release *v1beta1.BatchRelease) (*batchcontext.BatchContext, error) { diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/control_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/control_test.go index 5d3ab77e..59234409 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/deployment/control_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/control_test.go @@ -151,6 +151,7 @@ var ( func init() { apps.AddToScheme(scheme) + corev1.AddToScheme(scheme) rolloutapi.AddToScheme(scheme) kruiseappsv1alpha1.AddToScheme(scheme) } @@ -320,12 +321,13 @@ func TestRealController(t *testing.T) { controller, err := c.BuildController() Expect(err).NotTo(HaveOccurred()) - err = controller.Initialize(release) + err = controller.Initialize(context.Background(), release) Expect(err).NotTo(HaveOccurred()) fetch := &apps.Deployment{} Expect(cli.Get(context.TODO(), deploymentKey, fetch)).NotTo(HaveOccurred()) Expect(fetch.Spec.Paused).Should(BeTrue()) Expect(fetch.Spec.Strategy.Type).Should(Equal(apps.RecreateDeploymentStrategyType)) + Expect(fetch.Spec.Strategy.RollingUpdate).Should(BeNil()) Expect(fetch.Annotations[util.BatchReleaseControlAnnotation]).Should(Equal(getControlInfo(release))) strategy := util.GetDeploymentStrategy(fetch) Expect(strategy.Paused).Should(BeFalse()) @@ -334,7 +336,7 @@ func TestRealController(t *testing.T) { for { batchContext, err := controller.CalculateBatchContext(release) Expect(err).NotTo(HaveOccurred()) - err = controller.UpgradeBatch(batchContext) + err = controller.UpgradeBatch(context.Background(), batchContext) fetch := &apps.Deployment{} // mock Expect(cli.Get(context.TODO(), deploymentKey, fetch)).NotTo(HaveOccurred()) @@ -349,7 +351,7 @@ func TestRealController(t *testing.T) { Expect(strategy.Partition.StrVal).Should(Equal("50%")) release.Spec.ReleasePlan.BatchPartition = nil - err = controller.Finalize(release) + err = controller.Finalize(context.Background(), release) Expect(err).NotTo(HaveOccurred()) fetch = &apps.Deployment{} Expect(cli.Get(context.TODO(), deploymentKey, fetch)).NotTo(HaveOccurred()) @@ -497,7 +499,7 @@ func TestFinalize(t *testing.T) { t.Fatalf("BuildController failed: %s", err.Error()) } cs.featureGateFunc() - err = c.Finalize(br) + err = c.Finalize(context.Background(), br) if err != nil { t.Fatalf("BuildController failed: %s", err.Error()) } diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_batch_context.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_batch_context.go new file mode 100644 index 00000000..2b019614 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_batch_context.go @@ -0,0 +1,77 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "fmt" + "time" + + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/intstr" + + "github.com/openkruise/rollouts/pkg/util" +) + +func (mc *MinReadyControl) minReadyUpdatedReadyReplicas(updateRevision string, pods []*corev1.Pod) (int32, error) { + original, err := parseOriginalDeploymentStrategy(mc.object.Annotations) + if err != nil { + return 0, err + } + return countUpdatedAvailablePods(pods, updateRevision, originalMinReadySeconds(original), time.Now()), nil +} + +func countUpdatedAvailablePods(pods []*corev1.Pod, updateRevision string, minReadySeconds int32, now time.Time) int32 { + return int32(util.WrappedPodCount(pods, func(pod *corev1.Pod) bool { + if !util.IsPodActive(pod) { + return false + } + if !util.IsConsistentWithRevision(pod.Labels, updateRevision) { + return false + } + ready := util.GetPodReadyCondition(pod.Status) + if ready == nil || ready.Status != corev1.ConditionTrue { + return false + } + return !ready.LastTransitionTime.Add(time.Duration(minReadySeconds) * time.Second).After(now) + })) +} + +func originalMinReadySeconds(original *originalDeploymentStrategy) int32 { + if original.minReadySeconds == nil { + return 0 + } + return *original.minReadySeconds +} + +func minReadyDesiredUpdatedReplicas(desired intstr.IntOrString, deployment *apps.Deployment) (int32, error) { + if deployment.Spec.Replicas == nil { + return 0, fmt.Errorf("deployment replicas is nil") + } + replicas := int(*deployment.Spec.Replicas) + target, err := intstr.GetScaledValueFromIntOrPercent(&desired, replicas, true) + if err != nil { + return 0, err + } + if target < 0 { + return 0, nil + } + if target > replicas { + return int32(replicas), nil + } + return int32(target), nil +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_constants.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_constants.go new file mode 100644 index 00000000..f9f7d9ee --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_constants.go @@ -0,0 +1,105 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "fmt" + "strconv" + "strings" + + "k8s.io/apimachinery/pkg/util/intstr" + + "github.com/openkruise/rollouts/api/v1beta1" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle" +) + +const ( + // Aliases kept for readability inside this package; the canonical + // definitions live in api/v1beta1 so that packages which cannot import + // this one (e.g. partitionstyle) can still recognize MinReady state. + AnnotationOriginalMinReadySeconds = v1beta1.MinReadyOriginalMinReadySecondsAnnotation + AnnotationOriginalProgressDeadlineSeconds = v1beta1.MinReadyOriginalProgressDeadlineSecondsAnnotation + AnnotationOriginalMaxUnavailable = v1beta1.MinReadyOriginalMaxUnavailableAnnotation + + DefaultProgressDeadlineSeconds int32 = v1beta1.MinReadyDefaultProgressDeadlineSeconds + DefaultMaxUnavailable = v1beta1.MinReadyDefaultMaxUnavailable + + InflatedMinReadySeconds int32 = v1beta1.MaxReadySeconds + InflatedProgressDeadlineSeconds int32 = v1beta1.MaxProgressSeconds +) + +var AllOriginalAnnotations = v1beta1.MinReadyOriginalAnnotations + +func serializeOriginalInt32(value *int32, defaultValue int32) string { + if value == nil { + return strconv.FormatInt(int64(defaultValue), 10) + } + return strconv.FormatInt(int64(*value), 10) +} + +func serializeOriginalIntOrString(value *intstr.IntOrString) string { + if value == nil { + return DefaultMaxUnavailable + } + if value.Type == intstr.String { + return value.StrVal + } + return strconv.FormatInt(int64(value.IntVal), 10) +} + +func parseOriginalInt32(annotations map[string]string, key string) (*int32, error) { + raw, ok := annotations[key] + if !ok { + return nil, fmt.Errorf("annotation %s missing: %w", key, partitionstyle.ErrMinReadyAnnotationInvalid) + } + if raw == "" { + return nil, fmt.Errorf("annotation %s present but empty: %w", key, partitionstyle.ErrMinReadyAnnotationInvalid) + } + n, err := strconv.ParseInt(raw, 10, 32) + if err != nil { + return nil, fmt.Errorf("annotation %s malformed int32: %v: %w", key, err, partitionstyle.ErrMinReadyAnnotationInvalid) + } + v := int32(n) + return &v, nil +} + +func parseOriginalIntOrString(annotations map[string]string, key string) (*intstr.IntOrString, error) { + raw, ok := annotations[key] + if !ok { + return nil, fmt.Errorf("annotation %s missing: %w", key, partitionstyle.ErrMinReadyAnnotationInvalid) + } + if raw == "" { + return nil, fmt.Errorf("annotation %s present but empty: %w", key, partitionstyle.ErrMinReadyAnnotationInvalid) + } + if strings.HasSuffix(raw, "%") { + if _, err := strconv.Atoi(strings.TrimSuffix(raw, "%")); err != nil { + return nil, fmt.Errorf("annotation %s malformed percent: %v: %w", key, err, partitionstyle.ErrMinReadyAnnotationInvalid) + } + v := intstr.FromString(raw) + return &v, nil + } + n, err := strconv.Atoi(raw) + if err != nil { + return nil, fmt.Errorf("annotation %s malformed int: %v: %w", key, err, partitionstyle.ErrMinReadyAnnotationInvalid) + } + v := intstr.FromInt(n) + return &v, nil +} + +func hasAnyOriginalAnnotation(annotations map[string]string) bool { + return v1beta1.HasMinReadyOriginalAnnotations(annotations) +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go new file mode 100644 index 00000000..ba06ed64 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control.go @@ -0,0 +1,483 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "context" + "fmt" + + apps "k8s.io/api/apps/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/tools/record" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/openkruise/rollouts/api/v1alpha1" + "github.com/openkruise/rollouts/api/v1beta1" + batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle" + "github.com/openkruise/rollouts/pkg/feature" + "github.com/openkruise/rollouts/pkg/util" + utilfeature "github.com/openkruise/rollouts/pkg/util/feature" +) + +type MinReadyControl struct { + *realController + statusWriter *partitionstyle.MinReadyStatusWriter +} + +func (mc *MinReadyControl) IsMinReadyControl() bool { + return true +} + +func (mc *MinReadyControl) BindMinReadyStatus(release *v1beta1.BatchRelease, status *v1beta1.BatchReleaseStatus, recorder record.EventRecorder) { + mc.statusWriter = partitionstyle.NewMinReadyStatusWriter(release, status, recorder) +} + +func (mc *MinReadyControl) RecordOperationFailed(reason string, err error) { + if mc.statusWriter != nil { + mc.statusWriter.RecordDegraded(reason, err) + } +} + +func (mc *MinReadyControl) RecordZeroReplicaBatching() { + if mc.statusWriter != nil { + mc.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatching", "MinReadySeconds strategy has no replicas to upgrade") + } +} + +func (mc *MinReadyControl) RecordBatchAdvanced() { + if mc.statusWriter != nil { + mc.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatching", "MinReadySeconds strategy advanced the current batch") + } +} + +func (mc *MinReadyControl) RecordZeroReplicaBatchReady() { + if mc.statusWriter != nil { + mc.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatchReady", "MinReadySeconds strategy batch is ready") + } +} + +func (mc *MinReadyControl) RecordBatchReady() { + if mc.statusWriter != nil { + mc.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatchReady", "MinReadySeconds strategy batch is ready") + } +} + +func (mc *MinReadyControl) RecordInitialized() { + if mc.statusWriter != nil { + mc.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyInitialized, "MinReadyInitialized", "MinReadySeconds strategy initialized") + } +} + +func (mc *MinReadyControl) RecordFinalized() { + if mc.statusWriter != nil { + mc.statusWriter.RecordNormal(v1beta1.RolloutConditionMinReadyFinalized, "MinReadyFinalized", "MinReadySeconds strategy finalized") + } +} + +func (mc *MinReadyControl) ObserveBatchWait() { + if mc.statusWriter == nil { + return + } + status := mc.statusWriter.BatchReleaseStatus() + if status == nil { + return + } + condition := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyBatching) + partitionstyle.ObserveMinReadyBatchWait(mc.statusWriter.BatchRelease(), condition) +} + +func (mc *MinReadyControl) BuildController() (partitionstyle.Interface, error) { + if mc.realController == nil { + return nil, fmt.Errorf("MinReadyControl.BuildController: realController is nil") + } + built, err := mc.realController.BuildController() + if err != nil { + return nil, err + } + rc, ok := built.(*realController) + if !ok { + return nil, fmt.Errorf("MinReadyControl.BuildController: expected *realController, got %T", built) + } + return &MinReadyControl{realController: rc, statusWriter: mc.statusWriter}, nil +} + +func (mc *MinReadyControl) Initialize(ctx context.Context, release *v1beta1.BatchRelease) error { + if release == nil { + return fmt.Errorf("MinReadyControl.Initialize: release is nil") + } + if err := mc.ensureInitializeAllowed(); err != nil { + return fmt.Errorf("MinReadyControl.Initialize: %w", err) + } + original := mc.object + modified := mc.object.DeepCopy() + if err := prepareOriginalAnnotations(original, modified); err != nil { + return fmt.Errorf("MinReadyControl.Initialize: %w", err) + } + modified.Annotations[util.BatchReleaseControlAnnotation] = util.DumpJSON(metav1.NewControllerRef( + release, release.GetObjectKind().GroupVersionKind())) + inflateDeploymentStrategy(modified) + patch := client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{}) + if err := mc.client.Patch(ctx, modified, patch); err != nil { + return fmt.Errorf("MinReadyControl.Initialize: %w", err) + } + return nil +} + +func (mc *MinReadyControl) UpgradeBatch(ctx context.Context, batchContext *batchcontext.BatchContext) error { + if err := mc.ensureInflatedDeploymentStrategy(ctx); err != nil { + return fmt.Errorf("MinReadyControl.UpgradeBatch[%d]: %w", batchContext.CurrentBatch, err) + } + return mc.reconcileMaxUnavailable(ctx, batchContext) +} + +func (mc *MinReadyControl) ReconcileMaxUnavailableDrift(ctx context.Context, batchContext *batchcontext.BatchContext) error { + if err := mc.ensureInflatedDeploymentStrategy(ctx); err != nil { + return fmt.Errorf("MinReadyControl.ReconcileMaxUnavailableDrift[%d]: %w", batchContext.CurrentBatch, err) + } + return mc.reconcileMaxUnavailable(ctx, batchContext) +} + +func (mc *MinReadyControl) reconcileMaxUnavailable(ctx context.Context, batchContext *batchcontext.BatchContext) error { + if err := mc.refreshDeployment(ctx); err != nil { + return fmt.Errorf("MinReadyControl.reconcileMaxUnavailable[%d]: %w", batchContext.CurrentBatch, err) + } + current, err := intstr.GetScaledValueFromIntOrPercent( + mc.object.Spec.Strategy.RollingUpdate.MaxUnavailable, int(batchContext.Replicas), true) + if err != nil { + return fmt.Errorf("MinReadyControl.reconcileMaxUnavailable[%d]: %w", batchContext.CurrentBatch, err) + } + target := batchContext.DesiredUpdatedReplicas + + // At or above the batch target there is nothing to advance. When current + // exceeds the target (HPA scale-down or external tampering) converge it + // back down so the native controller never holds a wider budget than this + // batch needs. + if int32(current) >= target { + if int32(current) == target { + return nil + } + klog.V(0).InfoS("MinReady maxUnavailable exceeds target, reducing", + "batch", batchContext.CurrentBatch, "deployment", klog.KObj(mc.object), + "maxUnavailable", current, "target", target) + return mc.patchMaxUnavailable(ctx, int(target)) + } + + // Sliding window: keep no more than the user's original maxUnavailable + // budget worth of updated-but-not-ready pods in flight. As each updated pod + // becomes ready, top up the window immediately instead of waiting for the + // whole current window to become ready. + step, err := mc.maxUnavailableStep(batchContext.Replicas) + if err != nil { + return fmt.Errorf("MinReadyControl.reconcileMaxUnavailable[%d]: %w", batchContext.CurrentBatch, err) + } + if step <= 0 { + // maxUnavailable=0 means the user relies on maxSurge for concurrency + // control; there is no budget to slide, so drive the batch directly. + return mc.patchMaxUnavailable(ctx, int(target)) + } + next := int(batchContext.UpdatedReadyReplicas) + step + if next <= current { + return nil + } + if int32(next) > target { + next = int(target) + } + return mc.patchMaxUnavailable(ctx, next) +} + +// maxUnavailableStep returns the user's original maxUnavailable scaled to the +// replica count; the sliding window uses it as the advancement stride. +func (mc *MinReadyControl) maxUnavailableStep(replicas int32) (int, error) { + original, err := parseOriginalDeploymentStrategy(mc.object.Annotations) + if err != nil { + return 0, err + } + step := intstr.FromString(DefaultMaxUnavailable) + if original.maxUnavailable != nil { + step = *original.maxUnavailable + } + return intstr.GetScaledValueFromIntOrPercent(&step, int(replicas), true) +} + +// patchMaxUnavailable writes the given integer maxUnavailable back to the +// Deployment with an optimistic-lock patch and refreshes the cached object. +func (mc *MinReadyControl) patchMaxUnavailable(ctx context.Context, value int) error { + original := mc.object + modified := mc.object.DeepCopy() + maxUnavailable := intstr.FromInt(value) + modified.Spec.Strategy.RollingUpdate.MaxUnavailable = &maxUnavailable + patch := client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{}) + if err := mc.client.Patch(ctx, modified, patch); err != nil { + return fmt.Errorf("MinReadyControl.reconcileMaxUnavailable: %w", err) + } + mc.object = modified + return nil +} + +func (mc *MinReadyControl) refreshDeployment(ctx context.Context) error { + if mc.realController == nil { + return fmt.Errorf("deployment is not loaded") + } + object := &apps.Deployment{} + if err := mc.client.Get(ctx, mc.key, object); err != nil { + return err + } + mc.object = object + mc.WorkloadInfo = mc.getWorkloadInfo(object) + return nil +} + +func (mc *MinReadyControl) Finalize(ctx context.Context, _ *v1beta1.BatchRelease) error { + if mc.object == nil { + return nil + } + if !hasAnyOriginalAnnotation(mc.object.Annotations) { + if hasInflatedDeploymentFields(mc.object) { + return fmt.Errorf("MinReadyControl.Finalize: annotation state missing while deployment fields are still inflated: %w", + partitionstyle.ErrMinReadyAnnotationInvalid) + } + return nil + } + original := mc.object + restored, err := parseOriginalDeploymentStrategy(original.Annotations) + if err != nil { + return fmt.Errorf("MinReadyControl.Finalize: %w", err) + } + modified := mc.object.DeepCopy() + applyOriginalDeploymentStrategy(modified, restored) + for _, key := range AllOriginalAnnotations { + delete(modified.Annotations, key) + } + delete(modified.Annotations, util.BatchReleaseControlAnnotation) + delete(modified.Labels, v1alpha1.DeploymentStableRevisionLabel) + patch := client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{}) + if err := mc.client.Patch(ctx, modified, patch); err != nil { + return fmt.Errorf("MinReadyControl.Finalize: %w", err) + } + return nil +} + +func (mc *MinReadyControl) CalculateBatchContext(release *v1beta1.BatchRelease) (*batchcontext.BatchContext, error) { + rolloutID := release.Spec.ReleasePlan.RolloutID + pods, err := mc.ListOwnedPods() + if err != nil { + return nil, fmt.Errorf("MinReadyControl.CalculateBatchContext: %w", err) + } + + currentBatch := release.Status.CanaryStatus.CurrentBatch + desiredPartition := release.Spec.ReleasePlan.Batches[currentBatch].CanaryReplicas + desiredUpdatedReplicas, err := minReadyDesiredUpdatedReplicas(desiredPartition, mc.object) + if err != nil { + return nil, fmt.Errorf("MinReadyControl.CalculateBatchContext: %w", err) + } + updatedReadyReplicas, err := mc.minReadyUpdatedReadyReplicas(release.Status.UpdateRevision, pods) + if err != nil { + return nil, fmt.Errorf("MinReadyControl.CalculateBatchContext: %w", err) + } + return &batchcontext.BatchContext{ + RolloutID: rolloutID, + CurrentBatch: currentBatch, + UpdateRevision: release.Status.UpdateRevision, + Replicas: mc.Replicas, + UpdatedReplicas: mc.object.Status.UpdatedReplicas, + UpdatedReadyReplicas: updatedReadyReplicas, + PlannedUpdatedReplicas: desiredUpdatedReplicas, + DesiredUpdatedReplicas: desiredUpdatedReplicas, + DesiredPartition: desiredPartition, + FailureThreshold: release.Spec.ReleasePlan.FailureThreshold, + Pods: pods, + }, nil +} + +func (mc *MinReadyControl) ensureInitializeAllowed() error { + if mc.realController == nil || mc.object == nil { + return fmt.Errorf("deployment is not loaded") + } + if !utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) { + return fmt.Errorf("%s %w", feature.MinReadySecondsStrategy, partitionstyle.ErrMinReadyFeatureGateDisabled) + } + if err := validateDeploymentStrategyType(mc.object); err != nil { + return err + } + return nil +} + +func prepareOriginalAnnotations(deployment, writeTarget *apps.Deployment) error { + if !hasAnyOriginalAnnotation(deployment.Annotations) { + writeOriginalAnnotations(deployment, writeTarget) + return nil + } + if err := ensureOriginalAnnotations(deployment); err != nil { + return err + } + return validateInflatedDeploymentStrategy(deployment) +} + +func ensureOriginalAnnotations(deployment *apps.Deployment) error { + _, err := parseOriginalDeploymentStrategy(deployment.Annotations) + return err +} + +func writeOriginalAnnotations(original, modified *apps.Deployment) { + if modified.Annotations == nil { + modified.Annotations = map[string]string{} + } + writeOriginalAvailabilityAnnotations(original, modified) + modified.Annotations[AnnotationOriginalMaxUnavailable] = serializeOriginalIntOrString(originalMaxUnavailable(original)) +} + +func writeOriginalAvailabilityAnnotations(original, modified *apps.Deployment) { + if modified.Annotations == nil { + modified.Annotations = map[string]string{} + } + modified.Annotations[AnnotationOriginalMinReadySeconds] = serializeOriginalInt32(&original.Spec.MinReadySeconds, 0) + modified.Annotations[AnnotationOriginalProgressDeadlineSeconds] = serializeOriginalInt32(original.Spec.ProgressDeadlineSeconds, DefaultProgressDeadlineSeconds) +} + +func originalMaxUnavailable(deployment *apps.Deployment) *intstr.IntOrString { + if deployment.Spec.Strategy.RollingUpdate == nil { + return nil + } + return deployment.Spec.Strategy.RollingUpdate.MaxUnavailable +} + +func inflateDeploymentStrategy(deployment *apps.Deployment) { + progressDeadlineSeconds := InflatedProgressDeadlineSeconds + maxUnavailable := intstr.FromInt(0) + // MinReady keeps the native controller running; a paused Deployment would + // freeze silently, so pausing is always reverted together with inflation. + deployment.Spec.Paused = false + deployment.Spec.MinReadySeconds = InflatedMinReadySeconds + deployment.Spec.ProgressDeadlineSeconds = &progressDeadlineSeconds + if deployment.Spec.Strategy.RollingUpdate == nil { + deployment.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{} + } + deployment.Spec.Strategy.RollingUpdate.MaxUnavailable = &maxUnavailable +} + +func (mc *MinReadyControl) ensureInflatedDeploymentStrategy(ctx context.Context) error { + if err := validateDeploymentStrategyType(mc.object); err != nil { + return err + } + if validateInflatedDeploymentStrategy(mc.object) == nil { + return nil + } + original := mc.object + modified := mc.object.DeepCopy() + inflateDeploymentStrategy(modified) + patch := client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{}) + if err := mc.client.Patch(ctx, modified, patch); err != nil { + return err + } + mc.object = modified + return nil +} + +func validateInflatedDeploymentStrategy(deployment *apps.Deployment) error { + if err := validateDeploymentStrategyType(deployment); err != nil { + return err + } + if deployment.Spec.Paused { + // A paused Deployment silently freezes the native controller; surface + // it through the degraded channel instead of waiting without signal. + return fmt.Errorf("%w: deployment is paused", partitionstyle.ErrMinReadyDriftDetected) + } + if deployment.Spec.MinReadySeconds != InflatedMinReadySeconds { + return fmt.Errorf("%w: minReadySeconds=%d want %d", + partitionstyle.ErrMinReadyDriftDetected, deployment.Spec.MinReadySeconds, InflatedMinReadySeconds) + } + if deployment.Spec.ProgressDeadlineSeconds == nil || *deployment.Spec.ProgressDeadlineSeconds != InflatedProgressDeadlineSeconds { + return fmt.Errorf("%w: progressDeadlineSeconds=%v want %d", + partitionstyle.ErrMinReadyDriftDetected, deployment.Spec.ProgressDeadlineSeconds, InflatedProgressDeadlineSeconds) + } + if deployment.Spec.Strategy.RollingUpdate == nil { + return fmt.Errorf("%w: rollingUpdate is nil", partitionstyle.ErrMinReadyDriftDetected) + } + return nil +} + +func validateDeploymentStrategyType(deployment *apps.Deployment) error { + if deployment.Spec.Strategy.Type != apps.RollingUpdateDeploymentStrategyType { + return fmt.Errorf("%w: deployment strategy type %s is not RollingUpdate", + partitionstyle.ErrMinReadyDriftDetected, deployment.Spec.Strategy.Type) + } + return nil +} + +func hasInflatedDeploymentFields(deployment *apps.Deployment) bool { + if deployment.Spec.MinReadySeconds == InflatedMinReadySeconds { + return true + } + return deployment.Spec.ProgressDeadlineSeconds != nil && + *deployment.Spec.ProgressDeadlineSeconds == InflatedProgressDeadlineSeconds +} + +type originalDeploymentStrategy struct { + minReadySeconds *int32 + progressDeadlineSeconds *int32 + maxUnavailable *intstr.IntOrString +} + +func parseOriginalDeploymentStrategy(annotations map[string]string) (*originalDeploymentStrategy, error) { + minReadySeconds, err := parseOriginalInt32(annotations, AnnotationOriginalMinReadySeconds) + if err != nil { + return nil, err + } + progressDeadlineSeconds, err := parseOriginalInt32(annotations, AnnotationOriginalProgressDeadlineSeconds) + if err != nil { + return nil, err + } + maxUnavailable, err := parseOriginalIntOrString(annotations, AnnotationOriginalMaxUnavailable) + if err != nil { + return nil, err + } + return &originalDeploymentStrategy{ + minReadySeconds: minReadySeconds, + progressDeadlineSeconds: progressDeadlineSeconds, + maxUnavailable: maxUnavailable, + }, nil +} + +func applyOriginalDeploymentStrategy(deployment *apps.Deployment, original *originalDeploymentStrategy) { + deployment.Spec.MinReadySeconds = 0 + if original.minReadySeconds != nil { + deployment.Spec.MinReadySeconds = *original.minReadySeconds + } + deployment.Spec.ProgressDeadlineSeconds = original.progressDeadlineSeconds + if original.maxUnavailable == nil && (deployment.Spec.Strategy.RollingUpdate == nil || + deployment.Spec.Strategy.RollingUpdate.MaxSurge == nil) { + deployment.Spec.Strategy.RollingUpdate = nil + return + } + if deployment.Spec.Strategy.RollingUpdate == nil { + deployment.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{} + } + deployment.Spec.Strategy.RollingUpdate.MaxUnavailable = original.maxUnavailable +} + +// EventDegradedDriftDetected is the warning event reason recorded when +// external drift of the inflated fields is detected. It equals the sentinel +// error text so events, metrics and errors.Is classification stay in sync. +var EventDegradedDriftDetected = partitionstyle.ErrMinReadyDriftDetected.Error() + +var _ partitionstyle.Interface = (*MinReadyControl)(nil) +var _ partitionstyle.MinReadyStatusBinder = (*MinReadyControl)(nil) +var _ partitionstyle.MinReadyLifecycle = (*MinReadyControl)(nil) +var _ partitionstyle.MinReadyDriftReconciler = (*MinReadyControl)(nil) diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go new file mode 100644 index 00000000..66761a0a --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_control_test.go @@ -0,0 +1,619 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "context" + "strings" + "testing" + + apps "k8s.io/api/apps/v1" + policyv1 "k8s.io/api/policy/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/pointer" + + batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" + "github.com/openkruise/rollouts/pkg/feature" + "github.com/openkruise/rollouts/pkg/util" + utilfeature "github.com/openkruise/rollouts/pkg/util/feature" +) + +func init() { + _ = policyv1.AddToScheme(scheme) +} + +func TestMinReadyInitializeWritesOriginalAnnotationsAndInflatesFields(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newMinReadyDeployment() + control := newBuiltMinReadyControl(t, deployment) + + if err := control.Initialize(context.Background(), releaseDemo.DeepCopy()); err != nil { + t.Fatalf("Initialize failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + assertMinReadyInflated(t, got) + annotations := got.GetAnnotations() + assertAnnotation(t, annotations, AnnotationOriginalMinReadySeconds, "7") + assertAnnotation(t, annotations, AnnotationOriginalProgressDeadlineSeconds, "60") + assertAnnotation(t, annotations, AnnotationOriginalMaxUnavailable, "25%") + assertAnnotation(t, annotations, util.BatchReleaseControlAnnotation, getControlInfo(releaseDemo)) +} + +func TestMinReadyInitializeIsIdempotentAndDoesNotOverwriteAnnotations(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newMinReadyDeployment() + deployment.Annotations = map[string]string{ + AnnotationOriginalMinReadySeconds: "5", + AnnotationOriginalProgressDeadlineSeconds: "30", + AnnotationOriginalMaxUnavailable: "10%", + } + inflateDeploymentStrategy(deployment) + control := newBuiltMinReadyControl(t, deployment) + + if err := control.Initialize(context.Background(), releaseDemo.DeepCopy()); err != nil { + t.Fatalf("Initialize failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + assertAnnotation(t, got.Annotations, AnnotationOriginalMinReadySeconds, "5") + assertAnnotation(t, got.Annotations, AnnotationOriginalProgressDeadlineSeconds, "30") + assertAnnotation(t, got.Annotations, AnnotationOriginalMaxUnavailable, "10%") + assertMinReadyInflated(t, got) +} + +func TestMinReadyInitializeRejectsGitOpsDrift(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newMinReadyDeployment() + deployment.Annotations = map[string]string{ + AnnotationOriginalMinReadySeconds: "5", + AnnotationOriginalProgressDeadlineSeconds: "30", + AnnotationOriginalMaxUnavailable: "10%", + } + control := newBuiltMinReadyControl(t, deployment) + + err := control.Initialize(context.Background(), releaseDemo.DeepCopy()) + if err == nil || !strings.Contains(err.Error(), EventDegradedDriftDetected) { + t.Fatalf("Initialize error = %v, want drift detected", err) + } +} + +func TestMinReadyInitializeRejectsPartialOriginalAnnotations(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newMinReadyDeployment() + deployment.Annotations = map[string]string{ + AnnotationOriginalMinReadySeconds: "5", + } + control := newBuiltMinReadyControl(t, deployment) + + err := control.Initialize(context.Background(), releaseDemo.DeepCopy()) + if err == nil || !strings.Contains(err.Error(), AnnotationOriginalProgressDeadlineSeconds) { + t.Fatalf("Initialize error = %v, want missing annotation error", err) + } +} + +func TestMinReadyInitializeRejectsEmptyOriginalAnnotations(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newMinReadyDeployment() + deployment.Annotations = map[string]string{ + AnnotationOriginalMinReadySeconds: "", + AnnotationOriginalProgressDeadlineSeconds: "30", + AnnotationOriginalMaxUnavailable: "10%", + } + control := newBuiltMinReadyControl(t, deployment) + + err := control.Initialize(context.Background(), releaseDemo.DeepCopy()) + if err == nil || !strings.Contains(err.Error(), "present but empty") { + t.Fatalf("Initialize error = %v, want empty annotation error", err) + } +} + +func TestMinReadyInitializeSerializesKubernetesDefaults(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newMinReadyDeployment() + deployment.Spec.MinReadySeconds = 0 + deployment.Spec.ProgressDeadlineSeconds = nil + deployment.Spec.Strategy.RollingUpdate = nil + control := newBuiltMinReadyControl(t, deployment) + + if err := control.Initialize(context.Background(), releaseDemo.DeepCopy()); err != nil { + t.Fatalf("Initialize failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + assertAnnotation(t, got.Annotations, AnnotationOriginalMinReadySeconds, "0") + assertAnnotation(t, got.Annotations, AnnotationOriginalProgressDeadlineSeconds, "600") + assertAnnotation(t, got.Annotations, AnnotationOriginalMaxUnavailable, "25%") + assertMinReadyInflatedWithoutSurgeRequirement(t, got) +} + +func TestMinReadyInitializeRejectsFeatureGateDisabled(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=false") + control := newBuiltMinReadyControl(t, newMinReadyDeployment()) + + err := control.Initialize(context.Background(), releaseDemo.DeepCopy()) + if err == nil || !strings.Contains(err.Error(), "feature gate is disabled") { + t.Fatalf("Initialize error = %v, want feature gate disabled", err) + } +} + +func TestMinReadyInitializeAllowsCoveringPDB(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newMinReadyDeployment() + pdb := &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{Name: "demo-pdb", Namespace: deployment.Namespace}, + Spec: policyv1.PodDisruptionBudgetSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "busybox"}}, + }, + } + control := newBuiltMinReadyControl(t, deployment, pdb) + + if err := control.Initialize(context.Background(), releaseDemo.DeepCopy()); err != nil { + t.Fatalf("Initialize failed: %v", err) + } +} + +func TestMinReadyUpgradeBatchUpdatesMaxUnavailableOnly(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newMinReadyDeployment() + control := newBuiltMinReadyControl(t, deployment) + if err := control.Initialize(context.Background(), releaseDemo.DeepCopy()); err != nil { + t.Fatalf("Initialize failed: %v", err) + } + control.object = fetchMinReadyDeployment(t, control) + ctx := &batchcontext.BatchContext{ + CurrentBatch: 1, + Replicas: 10, + DesiredUpdatedReplicas: 5, + } + + if err := control.UpgradeBatch(context.Background(), ctx); err != nil { + t.Fatalf("UpgradeBatch failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + // Sliding window (P0-3): UpgradeBatch advances maxUnavailable one step + // (original 25% of 10 = 3) toward the batch target 5, not straight to 5. + if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 3 { + t.Fatalf("maxUnavailable = %v, want 3 (first sliding-window step)", unavailable) + } + if got.Spec.Strategy.Type != apps.RollingUpdateDeploymentStrategyType { + t.Fatalf("strategy.type = %q, want RollingUpdate", got.Spec.Strategy.Type) + } +} + +func TestMinReadyUpgradeBatchRejectsStrategyTypeDrift(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newInflatedMinReadyDeployment() + addMinReadyOriginalAnnotations(deployment) + deployment.Spec.Strategy.Type = apps.RecreateDeploymentStrategyType + control := newBuiltMinReadyControl(t, deployment) + ctx := &batchcontext.BatchContext{ + CurrentBatch: 1, + Replicas: 10, + DesiredUpdatedReplicas: 5, + } + + err := control.UpgradeBatch(context.Background(), ctx) + if err == nil || !strings.Contains(err.Error(), EventDegradedDriftDetected) { + t.Fatalf("UpgradeBatch error = %v, want strategy type drift detected", err) + } +} + +func TestMinReadyUpgradeBatchHealsPausedDrift(t *testing.T) { + // P0-2: a Deployment paused mid-rollout silently freezes the native + // controller. validateInflatedDeploymentStrategy now treats paused as drift, + // so ensureInflatedDeploymentStrategy re-inflates and clears spec.paused, + // actively unfreezing the workload instead of leaving it stuck without signal. + // (Recreate strategy-type drift is reported as degraded instead of healed, + // because Recreate may have already deleted pods destructively.) + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newInflatedMinReadyDeployment() + addMinReadyOriginalAnnotations(deployment) + deployment.Spec.Paused = true + control := newBuiltMinReadyControl(t, deployment) + ctx := &batchcontext.BatchContext{ + CurrentBatch: 1, + Replicas: 10, + DesiredUpdatedReplicas: 5, + } + + if err := control.UpgradeBatch(context.Background(), ctx); err != nil { + t.Fatalf("UpgradeBatch failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + if got.Spec.Paused { + t.Fatalf("deployment still paused, want spec.paused=false after self-heal") + } + if got.Spec.MinReadySeconds != InflatedMinReadySeconds { + t.Fatalf("minReadySeconds = %d, want %d (re-inflated)", got.Spec.MinReadySeconds, InflatedMinReadySeconds) + } +} + +func TestMinReadyUpgradeBatchRestoresInflatedStrategyFields(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newInflatedMinReadyDeployment() + addMinReadyOriginalAnnotations(deployment) + deployment.Spec.MinReadySeconds = 7 + deployment.Spec.ProgressDeadlineSeconds = pointer.Int32(60) + deployment.Spec.Strategy.RollingUpdate = nil + control := newBuiltMinReadyControl(t, deployment) + ctx := &batchcontext.BatchContext{ + CurrentBatch: 1, + Replicas: 10, + DesiredUpdatedReplicas: 5, + } + + if err := control.UpgradeBatch(context.Background(), ctx); err != nil { + t.Fatalf("UpgradeBatch failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + if got.Spec.MinReadySeconds != InflatedMinReadySeconds { + t.Fatalf("minReadySeconds = %d, want %d", got.Spec.MinReadySeconds, InflatedMinReadySeconds) + } + if got.Spec.ProgressDeadlineSeconds == nil || *got.Spec.ProgressDeadlineSeconds != InflatedProgressDeadlineSeconds { + t.Fatalf("progressDeadlineSeconds = %v, want %d", got.Spec.ProgressDeadlineSeconds, InflatedProgressDeadlineSeconds) + } + if got.Spec.Strategy.RollingUpdate == nil { + t.Fatalf("rollingUpdate is nil, want restored strategy") + } + // Sliding window (P0-3): after re-inflation maxUnavailable starts at 0, so + // UpgradeBatch advances it one step (25% of 10 = 3) toward target 5. + if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 3 { + t.Fatalf("maxUnavailable = %v, want 3 (first sliding-window step)", unavailable) + } +} + +func TestMinReadyReconcileMaxUnavailableDriftConvergesExternalTampering(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newInflatedMinReadyDeployment() + addMinReadyOriginalAnnotations(deployment) + maxUnavailable := intstr.FromInt(5) + deployment.Spec.Strategy.RollingUpdate.MaxUnavailable = &maxUnavailable + control := newBuiltMinReadyControl(t, deployment) + ctx := &batchcontext.BatchContext{ + CurrentBatch: 0, + Replicas: 5, + DesiredUpdatedReplicas: 1, + } + + if err := control.ReconcileMaxUnavailableDrift(context.Background(), ctx); err != nil { + t.Fatalf("ReconcileMaxUnavailableDrift failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + if value := minReadyMaxUnavailableValue(t, got, 5); value != 1 { + t.Fatalf("maxUnavailable = %d, want 1 (converged while batch is ready)", value) + } +} + +func TestMinReadyUpgradeBatchConvergesMaxUnavailableOnScaleDown(t *testing.T) { + // P1-2: after a scale-down (HPA or manual) the previously-set integer + // maxUnavailable can exceed the new batch target. This is a legal state, not + // external tampering, so UpgradeBatch must converge it back to the target + // instead of reporting degraded drift. + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newInflatedMinReadyDeployment() + addMinReadyOriginalAnnotations(deployment) + maxUnavailable := intstr.FromInt(8) + deployment.Spec.Strategy.RollingUpdate.MaxUnavailable = &maxUnavailable + control := newBuiltMinReadyControl(t, deployment) + ctx := &batchcontext.BatchContext{ + CurrentBatch: 1, + Replicas: 10, + DesiredUpdatedReplicas: 5, + } + + if err := control.UpgradeBatch(context.Background(), ctx); err != nil { + t.Fatalf("UpgradeBatch failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + if value := minReadyMaxUnavailableValue(t, got, 10); value != 5 { + t.Fatalf("maxUnavailable = %d, want 5 (converged to target)", value) + } +} + +func TestMinReadyCalculateBatchContextUsesUpdatedReadyReplicas(t *testing.T) { + release := releaseDemo.DeepCopy() + release.Status.CanaryStatus.CurrentBatch = 1 + updateRevision := util.ComputeHash(&deploymentDemo.Spec.Template, nil) + release.Status.UpdateRevision = updateRevision + deployment := newMinReadyDeployment() + deployment.Status.Replicas = 10 + deployment.Status.UpdatedReplicas = 5 + deployment.Status.ReadyReplicas = 5 + addMinReadyOriginalAnnotations(deployment) + rs := newMinReadyReplicaSet(deployment, updateRevision, 5, 5) + pods := newMinReadyUpdatedPods(deployment, rs, updateRevision, "", 5, 5) + control := newBuiltMinReadyControl(t, deployment, appendPodObjects([]interface{}{rs}, pods)...) + + ctx, err := control.CalculateBatchContext(release) + if err != nil { + t.Fatalf("CalculateBatchContext failed: %v", err) + } + + if ctx.DesiredUpdatedReplicas != 5 || ctx.PlannedUpdatedReplicas != 5 { + t.Fatalf("desired/planned = %d/%d, want 5/5", ctx.DesiredUpdatedReplicas, ctx.PlannedUpdatedReplicas) + } + if ctx.UpdatedReadyReplicas != 5 { + t.Fatalf("UpdatedReadyReplicas = %d, want updated available pods 5", ctx.UpdatedReadyReplicas) + } + if err := ctx.IsBatchReady(); err != nil { + t.Fatalf("IsBatchReady failed: %v", err) + } +} + +func TestMinReadyCalculateBatchContextIgnoresOldReadyPods(t *testing.T) { + release := releaseDemo.DeepCopy() + release.Status.CanaryStatus.CurrentBatch = 1 + updateRevision := util.ComputeHash(&deploymentDemo.Spec.Template, nil) + release.Status.UpdateRevision = updateRevision + deployment := newMinReadyDeployment() + deployment.Status.Replicas = 10 + deployment.Status.UpdatedReplicas = 5 + deployment.Status.ReadyReplicas = 10 + addMinReadyOriginalAnnotations(deployment) + rs := newMinReadyReplicaSet(deployment, updateRevision, 5, 1) + pods := newMinReadyUpdatedPods(deployment, rs, updateRevision, "", 5, 1) + control := newBuiltMinReadyControl(t, deployment, appendPodObjects([]interface{}{rs}, pods)...) + + ctx, err := control.CalculateBatchContext(release) + if err != nil { + t.Fatalf("CalculateBatchContext failed: %v", err) + } + if ctx.UpdatedReadyReplicas != 1 { + t.Fatalf("UpdatedReadyReplicas = %d, want 1 from updated available pods only", ctx.UpdatedReadyReplicas) + } + if err := ctx.IsBatchReady(); err == nil { + t.Fatalf("IsBatchReady succeeded, want not ready error") + } +} + +func TestMinReadyCalculateBatchContextRequiresPodListingForRolloutID(t *testing.T) { + release := releaseDemo.DeepCopy() + release.Status.CanaryStatus.CurrentBatch = 1 + release.Spec.ReleasePlan.RolloutID = "rollout-1" + updateRevision := util.ComputeHash(&deploymentDemo.Spec.Template, nil) + release.Status.UpdateRevision = updateRevision + deployment := newMinReadyDeployment() + deployment.Status.Replicas = 10 + deployment.Status.UpdatedReplicas = 5 + deployment.Status.ReadyReplicas = 5 + addMinReadyOriginalAnnotations(deployment) + rs := newMinReadyReplicaSet(deployment, updateRevision, 5, 5) + control := newBuiltMinReadyControl(t, deployment, rs) + + ctx, err := control.CalculateBatchContext(release) + if err != nil { + t.Fatalf("CalculateBatchContext failed: %v", err) + } + if len(ctx.Pods) != 0 { + t.Fatalf("Pods = %d, want 0 when no pods exist in cluster", len(ctx.Pods)) + } + if err := ctx.IsBatchReady(); err == nil { + t.Fatalf("IsBatchReady succeeded, want batch label not satisfied") + } +} + +func TestMinReadyCalculateBatchContextCountsReadyPodsWhenListed(t *testing.T) { + release := releaseDemo.DeepCopy() + release.Status.CanaryStatus.CurrentBatch = 1 + release.Spec.ReleasePlan.RolloutID = "rollout-1" + updateRevision := util.ComputeHash(&deploymentDemo.Spec.Template, nil) + release.Status.UpdateRevision = updateRevision + deployment := newMinReadyDeployment() + deployment.Status.Replicas = 10 + deployment.Status.UpdatedReplicas = 5 + deployment.Status.ReadyReplicas = 10 + addMinReadyOriginalAnnotations(deployment) + rs := newMinReadyReplicaSet(deployment, updateRevision, 5, 1) + pods := newMinReadyUpdatedPods(deployment, rs, updateRevision, "rollout-1", 5, 3) + control := newBuiltMinReadyControl(t, deployment, rs, pods[0], pods[1], pods[2], pods[3], pods[4]) + + ctx, err := control.CalculateBatchContext(release) + if err != nil { + t.Fatalf("CalculateBatchContext failed: %v", err) + } + if len(ctx.Pods) != 5 { + t.Fatalf("Pods = %d, want 5 listed pods", len(ctx.Pods)) + } + if ctx.UpdatedReadyReplicas != 3 { + t.Fatalf("UpdatedReadyReplicas = %d, want 3 ready updated pods", ctx.UpdatedReadyReplicas) + } + if err := ctx.IsBatchReady(); err == nil { + t.Fatalf("IsBatchReady succeeded, want not ready error") + } +} + +func TestMinReadyCalculateBatchContextNotReady(t *testing.T) { + release := releaseDemo.DeepCopy() + release.Status.CanaryStatus.CurrentBatch = 1 + updateRevision := util.ComputeHash(&deploymentDemo.Spec.Template, nil) + release.Status.UpdateRevision = updateRevision + deployment := newMinReadyDeployment() + deployment.Status.Replicas = 10 + deployment.Status.UpdatedReplicas = 5 + deployment.Status.ReadyReplicas = 4 + addMinReadyOriginalAnnotations(deployment) + rs := newMinReadyReplicaSet(deployment, updateRevision, 5, 4) + pods := newMinReadyUpdatedPods(deployment, rs, updateRevision, "", 5, 4) + control := newBuiltMinReadyControl(t, deployment, appendPodObjects([]interface{}{rs}, pods)...) + + ctx, err := control.CalculateBatchContext(release) + if err != nil { + t.Fatalf("CalculateBatchContext failed: %v", err) + } + if err := ctx.IsBatchReady(); err == nil { + t.Fatalf("IsBatchReady succeeded, want not ready error") + } +} + +func TestMinReadyCalculateBatchContextRecomputesAfterScaling(t *testing.T) { + release := releaseDemo.DeepCopy() + release.Status.CanaryStatus.CurrentBatch = 1 + updateRevision := util.ComputeHash(&deploymentDemo.Spec.Template, nil) + release.Status.UpdateRevision = updateRevision + deployment := newMinReadyDeployment() + deployment.Spec.Replicas = pointer.Int32(20) + deployment.Status.Replicas = 20 + deployment.Status.UpdatedReplicas = 10 + deployment.Status.ReadyReplicas = 10 + addMinReadyOriginalAnnotations(deployment) + rs := newMinReadyReplicaSet(deployment, updateRevision, 10, 10) + pods := newMinReadyUpdatedPods(deployment, rs, updateRevision, "", 10, 10) + control := newBuiltMinReadyControl(t, deployment, appendPodObjects([]interface{}{rs}, pods)...) + + ctx, err := control.CalculateBatchContext(release) + if err != nil { + t.Fatalf("CalculateBatchContext failed: %v", err) + } + if ctx.DesiredUpdatedReplicas != 10 { + t.Fatalf("DesiredUpdatedReplicas = %d, want 10", ctx.DesiredUpdatedReplicas) + } +} + +func TestMinReadyCalculateBatchContextReplicasZero(t *testing.T) { + release := releaseDemo.DeepCopy() + release.Status.CanaryStatus.CurrentBatch = 1 + deployment := newMinReadyDeployment() + deployment.Spec.Replicas = pointer.Int32(0) + deployment.Status.Replicas = 0 + addMinReadyOriginalAnnotations(deployment) + control := newBuiltMinReadyControl(t, deployment) + + ctx, err := control.CalculateBatchContext(release) + if err != nil { + t.Fatalf("CalculateBatchContext failed: %v", err) + } + if ctx.DesiredUpdatedReplicas != 0 { + t.Fatalf("DesiredUpdatedReplicas = %d, want 0", ctx.DesiredUpdatedReplicas) + } +} + +func TestMinReadyFinalizeRestoresAfterGateDisabled(t *testing.T) { + // P1-4: even with the feature gate disabled, a Deployment carrying MinReady + // original annotations must finalize cleanly and restore the original fields. + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=false") + deployment := newInflatedMinReadyDeployment() + addMinReadyOriginalAnnotations(deployment) + control := newBuiltMinReadyControl(t, deployment) + + if err := control.Finalize(context.Background(), releaseDemo.DeepCopy()); err != nil { + t.Fatalf("Finalize failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + if got.Spec.MinReadySeconds != 7 { + t.Fatalf("minReadySeconds = %d, want 7 (restored)", got.Spec.MinReadySeconds) + } + if got.Spec.ProgressDeadlineSeconds == nil || *got.Spec.ProgressDeadlineSeconds != 60 { + t.Fatalf("progressDeadlineSeconds = %v, want 60 (restored)", got.Spec.ProgressDeadlineSeconds) + } + if hasAnyOriginalAnnotation(got.Annotations) { + t.Fatalf("original annotations not cleaned up: %v", got.Annotations) + } +} + +func TestMinReadySlidingWindowAdvancesStepByStep(t *testing.T) { + // P0-3: a large batch target must not be written to maxUnavailable in a + // single patch. reconcileMaxUnavailable keeps at most the user's original + // maxUnavailable (25% of 10 = 3) worth of updated-but-not-ready pods in + // flight, topping up the window as individual pods become ready. + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newInflatedMinReadyDeployment() + addMinReadyOriginalAnnotations(deployment) + control := newBuiltMinReadyControl(t, deployment) + ctx := &batchcontext.BatchContext{ + CurrentBatch: 1, + Replicas: 10, + DesiredUpdatedReplicas: 9, + } + + steps := []struct { + ready int32 + wantMU int + comment string + }{ + {0, 3, "empty window advances to first step"}, + {1, 4, "one ready pod tops up one slot"}, + {2, 5, "partial readiness keeps topping up"}, + {4, 7, "does not wait for the whole current window"}, + {6, 9, "advance caps at target"}, + {9, 9, "at target holds"}, + } + for i, s := range steps { + ctx.UpdatedReadyReplicas = s.ready + if err := control.ReconcileMaxUnavailableDrift(context.Background(), ctx); err != nil { + t.Fatalf("step %d (%s): %v", i, s.comment, err) + } + if v := minReadyMaxUnavailableValue(t, fetchMinReadyDeployment(t, control), 10); v != s.wantMU { + t.Fatalf("step %d (%s): maxUnavailable = %d, want %d", i, s.comment, v, s.wantMU) + } + } +} + +func TestMinReadySlidingWindowReachesSmallTargetInOneStep(t *testing.T) { + // P0-3: when the batch target is within one step (target <= step), the + // first advance is capped at the target, so small batches complete in one + // reconcile instead of overshooting. + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newInflatedMinReadyDeployment() + addMinReadyOriginalAnnotations(deployment) + control := newBuiltMinReadyControl(t, deployment) + ctx := &batchcontext.BatchContext{ + CurrentBatch: 1, + Replicas: 10, + DesiredUpdatedReplicas: 2, + UpdatedReadyReplicas: 0, + } + if err := control.ReconcileMaxUnavailableDrift(context.Background(), ctx); err != nil { + t.Fatalf("drift reconcile failed: %v", err) + } + if v := minReadyMaxUnavailableValue(t, fetchMinReadyDeployment(t, control), 10); v != 2 { + t.Fatalf("maxUnavailable = %d, want 2 (small target reached in one step)", v) + } +} + +func TestMinReadySlidingWindowStepZeroDrivesBatchDirectly(t *testing.T) { + // P0-3: original maxUnavailable=0 means the user relies on maxSurge for + // concurrency control, so there is no budget to slide; the batch target is + // driven directly to preserve the existing surge-gated behavior. + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + deployment := newInflatedMinReadyDeployment() + deployment.Annotations = map[string]string{ + AnnotationOriginalMinReadySeconds: "7", + AnnotationOriginalProgressDeadlineSeconds: "60", + AnnotationOriginalMaxUnavailable: "0", + } + control := newBuiltMinReadyControl(t, deployment) + ctx := &batchcontext.BatchContext{ + CurrentBatch: 1, + Replicas: 10, + DesiredUpdatedReplicas: 5, + UpdatedReadyReplicas: 0, + } + if err := control.ReconcileMaxUnavailableDrift(context.Background(), ctx); err != nil { + t.Fatalf("drift reconcile failed: %v", err) + } + if v := minReadyMaxUnavailableValue(t, fetchMinReadyDeployment(t, control), 10); v != 5 { + t.Fatalf("maxUnavailable = %d, want 5 (step=0 drives batch directly)", v) + } +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_finalize_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_finalize_test.go new file mode 100644 index 00000000..28101ef1 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_finalize_test.go @@ -0,0 +1,158 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "context" + "strings" + "testing" + + appsv1alpha1 "github.com/openkruise/rollouts/api/v1alpha1" + "github.com/openkruise/rollouts/pkg/util" +) + +func TestMinReadyFinalizeRestoresOriginalValues(t *testing.T) { + deployment := newInflatedMinReadyDeployment() + deployment.Annotations = map[string]string{ + AnnotationOriginalMinReadySeconds: "7", + AnnotationOriginalProgressDeadlineSeconds: "60", + AnnotationOriginalMaxUnavailable: "25%", + util.BatchReleaseControlAnnotation: getControlInfo(releaseDemo), + } + deployment.Labels = map[string]string{ + appsv1alpha1.DeploymentStableRevisionLabel: "stable-revision", + } + control := newBuiltMinReadyControl(t, deployment) + + if err := control.Finalize(context.Background(), releaseDemo.DeepCopy()); err != nil { + t.Fatalf("Finalize failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + if got.Spec.MinReadySeconds != 7 { + t.Fatalf("minReadySeconds = %d, want 7", got.Spec.MinReadySeconds) + } + if got.Spec.ProgressDeadlineSeconds == nil || *got.Spec.ProgressDeadlineSeconds != 60 { + t.Fatalf("progressDeadlineSeconds = %v, want 60", got.Spec.ProgressDeadlineSeconds) + } + if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.StrVal != "25%" { + t.Fatalf("maxUnavailable = %v, want 25%%", unavailable) + } + for _, key := range AllOriginalAnnotations { + if _, ok := got.Annotations[key]; ok { + t.Fatalf("annotation %s still exists", key) + } + } + if _, ok := got.Annotations[util.BatchReleaseControlAnnotation]; ok { + t.Fatalf("annotation %s still exists", util.BatchReleaseControlAnnotation) + } + if _, ok := got.Labels[appsv1alpha1.DeploymentStableRevisionLabel]; ok { + t.Fatalf("label %s still exists", appsv1alpha1.DeploymentStableRevisionLabel) + } +} + +func TestMinReadyFinalizeRestoresKubernetesDefaults(t *testing.T) { + deployment := newInflatedMinReadyDeployment() + deployment.Annotations = map[string]string{ + AnnotationOriginalMinReadySeconds: "0", + AnnotationOriginalProgressDeadlineSeconds: "600", + AnnotationOriginalMaxUnavailable: "25%", + } + control := newBuiltMinReadyControl(t, deployment) + + if err := control.Finalize(context.Background(), releaseDemo.DeepCopy()); err != nil { + t.Fatalf("Finalize failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + if got.Spec.MinReadySeconds != 0 { + t.Fatalf("minReadySeconds = %d, want 0", got.Spec.MinReadySeconds) + } + if got.Spec.ProgressDeadlineSeconds == nil || *got.Spec.ProgressDeadlineSeconds != DefaultProgressDeadlineSeconds { + t.Fatalf("progressDeadlineSeconds = %v, want %d", got.Spec.ProgressDeadlineSeconds, DefaultProgressDeadlineSeconds) + } + if got.Spec.Strategy.RollingUpdate == nil { + t.Fatalf("rollingUpdate = nil, want maxSurge preserved") + } + if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.StrVal != DefaultMaxUnavailable { + t.Fatalf("maxUnavailable = %v, want %s", unavailable, DefaultMaxUnavailable) + } + if surge := got.Spec.Strategy.RollingUpdate.MaxSurge; surge == nil || surge.IntVal != 1 { + t.Fatalf("maxSurge = %v, want original value preserved", surge) + } +} + +func TestMinReadyFinalizeNoopWhenAnnotationsAbsentAndFieldsRestored(t *testing.T) { + deployment := newMinReadyDeployment() + deployment.Annotations = nil + control := newBuiltMinReadyControl(t, deployment) + + if err := control.Finalize(context.Background(), releaseDemo.DeepCopy()); err != nil { + t.Fatalf("Finalize failed: %v", err) + } + + got := fetchMinReadyDeployment(t, control) + if got.Spec.MinReadySeconds != 7 { + t.Fatalf("minReadySeconds = %d, want original value 7", got.Spec.MinReadySeconds) + } +} + +func TestMinReadyFinalizeRejectsMissingAnnotationsWhileFieldsInflated(t *testing.T) { + deployment := newInflatedMinReadyDeployment() + deployment.Annotations = nil + control := newBuiltMinReadyControl(t, deployment) + + err := control.Finalize(context.Background(), releaseDemo.DeepCopy()) + if err == nil || !strings.Contains(err.Error(), "annotation state missing") { + t.Fatalf("Finalize error = %v, want missing annotation state error", err) + } + + got := fetchMinReadyDeployment(t, control) + assertMinReadyInflated(t, got) +} + +func TestMinReadyFinalizeRejectsPartialAnnotations(t *testing.T) { + deployment := newInflatedMinReadyDeployment() + deployment.Annotations = map[string]string{ + AnnotationOriginalMinReadySeconds: "7", + } + control := newBuiltMinReadyControl(t, deployment) + + err := control.Finalize(context.Background(), releaseDemo.DeepCopy()) + if err == nil || !strings.Contains(err.Error(), AnnotationOriginalProgressDeadlineSeconds) { + t.Fatalf("Finalize error = %v, want missing annotation error", err) + } + got := fetchMinReadyDeployment(t, control) + assertMinReadyInflated(t, got) +} + +func TestMinReadyFinalizeRejectsMalformedAnnotations(t *testing.T) { + deployment := newInflatedMinReadyDeployment() + deployment.Annotations = map[string]string{ + AnnotationOriginalMinReadySeconds: "7", + AnnotationOriginalProgressDeadlineSeconds: "bad", + AnnotationOriginalMaxUnavailable: "25%", + } + control := newBuiltMinReadyControl(t, deployment) + + err := control.Finalize(context.Background(), releaseDemo.DeepCopy()) + if err == nil || !strings.Contains(err.Error(), "malformed int32") { + t.Fatalf("Finalize error = %v, want malformed int32 error", err) + } + got := fetchMinReadyDeployment(t, control) + assertMinReadyInflated(t, got) +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_test_helpers_test.go b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_test_helpers_test.go new file mode 100644 index 00000000..ca3b01b5 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/deployment/minready_test_helpers_test.go @@ -0,0 +1,243 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "context" + "fmt" + "testing" + "time" + + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + "github.com/openkruise/rollouts/api/v1beta1" +) + +func newMinReadyDeployment() *apps.Deployment { + progressDeadline := int32(60) + maxUnavailable := intstr.FromString("25%") + maxSurge := intstr.FromInt(1) + deployment := deploymentDemo.DeepCopy() + deployment.ResourceVersion = "1" + deployment.UID = types.UID("minready-deployment-uid") + deployment.Spec.MinReadySeconds = 7 + deployment.Spec.ProgressDeadlineSeconds = &progressDeadline + deployment.Spec.Strategy.Type = apps.RollingUpdateDeploymentStrategyType + deployment.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{ + MaxUnavailable: &maxUnavailable, + MaxSurge: &maxSurge, + } + return deployment +} + +func newInflatedMinReadyDeployment() *apps.Deployment { + deployment := newMinReadyDeployment() + inflateDeploymentStrategy(deployment) + return deployment +} + +func newBuiltMinReadyControl(t *testing.T, deployment *apps.Deployment, objs ...interface{}) *MinReadyControl { + t.Helper() + objects := []interface{}{deployment} + objects = append(objects, objs...) + builder := fake.NewClientBuilder().WithScheme(scheme).WithObjects(toClientObjects(t, objects)...) + rc := NewController(builder.Build(), types.NamespacedName{ + Namespace: deployment.Namespace, + Name: deployment.Name, + }, deployment.GroupVersionKind()) + built, err := (&MinReadyControl{realController: rc.(*realController)}).BuildController() + if err != nil { + t.Fatalf("BuildController failed: %v", err) + } + return built.(*MinReadyControl) +} + +func toClientObjects(t *testing.T, objects []interface{}) []client.Object { + t.Helper() + result := make([]client.Object, 0, len(objects)) + for _, object := range objects { + typed, ok := object.(client.Object) + if !ok { + t.Fatalf("object %T does not implement client.Object", object) + } + result = append(result, typed) + } + return result +} + +func fetchMinReadyDeployment(t *testing.T, control *MinReadyControl) *apps.Deployment { + t.Helper() + got := &apps.Deployment{} + key := types.NamespacedName{Namespace: control.object.Namespace, Name: control.object.Name} + if err := control.client.Get(context.TODO(), key, got); err != nil { + t.Fatalf("Get deployment failed: %v", err) + } + return got +} + +func assertMinReadyInflated(t *testing.T, deployment *apps.Deployment) { + t.Helper() + if deployment.Spec.Strategy.Type != apps.RollingUpdateDeploymentStrategyType { + t.Fatalf("strategy.type = %q, want RollingUpdate", deployment.Spec.Strategy.Type) + } + if deployment.Spec.MinReadySeconds != InflatedMinReadySeconds { + t.Fatalf("minReadySeconds = %d, want %d", deployment.Spec.MinReadySeconds, InflatedMinReadySeconds) + } + if deployment.Spec.ProgressDeadlineSeconds == nil || *deployment.Spec.ProgressDeadlineSeconds != InflatedProgressDeadlineSeconds { + t.Fatalf("progressDeadlineSeconds = %v, want %d", deployment.Spec.ProgressDeadlineSeconds, InflatedProgressDeadlineSeconds) + } + if got := deployment.Spec.Strategy.RollingUpdate.MaxUnavailable; got == nil || got.IntVal != 0 { + t.Fatalf("maxUnavailable = %v, want 0", got) + } +} + +func assertMinReadyInflatedWithoutSurgeRequirement(t *testing.T, deployment *apps.Deployment) { + t.Helper() + if deployment.Spec.Strategy.Type != apps.RollingUpdateDeploymentStrategyType { + t.Fatalf("strategy.type = %q, want RollingUpdate", deployment.Spec.Strategy.Type) + } + if deployment.Spec.MinReadySeconds != InflatedMinReadySeconds { + t.Fatalf("minReadySeconds = %d, want %d", deployment.Spec.MinReadySeconds, InflatedMinReadySeconds) + } + if deployment.Spec.ProgressDeadlineSeconds == nil || *deployment.Spec.ProgressDeadlineSeconds != InflatedProgressDeadlineSeconds { + t.Fatalf("progressDeadlineSeconds = %v, want %d", deployment.Spec.ProgressDeadlineSeconds, InflatedProgressDeadlineSeconds) + } + if got := deployment.Spec.Strategy.RollingUpdate.MaxUnavailable; got == nil || got.IntVal != 0 { + t.Fatalf("maxUnavailable = %v, want 0", got) + } +} + +func addMinReadyOriginalAnnotations(deployment *apps.Deployment) { + if deployment.Annotations == nil { + deployment.Annotations = map[string]string{} + } + deployment.Annotations[AnnotationOriginalMinReadySeconds] = "7" + deployment.Annotations[AnnotationOriginalProgressDeadlineSeconds] = "60" + deployment.Annotations[AnnotationOriginalMaxUnavailable] = "25%" +} + +func appendPodObjects(objects []interface{}, pods []*corev1.Pod) []interface{} { + for _, pod := range pods { + objects = append(objects, pod) + } + return objects +} + +func assertAnnotation(t *testing.T, annotations map[string]string, key, want string) { + t.Helper() + if got := annotations[key]; got != want { + t.Fatalf("annotation %s = %q, want %q", key, got, want) + } +} + +func newMinReadyReplicaSet(deployment *apps.Deployment, updateRevision string, replicas, readyReplicas int32) *apps.ReplicaSet { + return &apps.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-%s", deployment.Name, updateRevision), + Namespace: deployment.Namespace, + UID: types.UID(fmt.Sprintf("rs-%s-%s", deployment.Name, updateRevision)), + Labels: map[string]string{ + apps.DefaultDeploymentUniqueLabelKey: updateRevision, + }, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: apps.SchemeGroupVersion.String(), + Kind: "Deployment", + Name: deployment.Name, + UID: deployment.UID, + Controller: pointerBool(true), + }, + }, + }, + Spec: apps.ReplicaSetSpec{ + Replicas: pointerInt32(replicas), + Selector: deployment.Spec.Selector.DeepCopy(), + Template: deployment.Spec.Template, + }, + Status: apps.ReplicaSetStatus{ + Replicas: replicas, + ReadyReplicas: readyReplicas, + }, + } +} + +func newMinReadyUpdatedPods(deployment *apps.Deployment, rs *apps.ReplicaSet, updateRevision, rolloutID string, total, ready int) []*corev1.Pod { + pods := make([]*corev1.Pod, 0, total) + for i := 0; i < total; i++ { + readyCondition := corev1.ConditionFalse + if i < ready { + readyCondition = corev1.ConditionTrue + } + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-pod-%d", deployment.Name, i), + Namespace: deployment.Namespace, + Labels: map[string]string{ + "app": "busybox", + apps.DefaultDeploymentUniqueLabelKey: updateRevision, + }, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: apps.SchemeGroupVersion.String(), + Kind: "ReplicaSet", + Name: rs.Name, + UID: rs.UID, + Controller: pointerBool(true), + }, + }, + }, + Status: corev1.PodStatus{ + Conditions: []corev1.PodCondition{{ + Type: corev1.PodReady, + Status: readyCondition, + LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Second)), + }}, + }, + } + if rolloutID != "" { + pod.Labels[v1beta1.RolloutIDLabel] = rolloutID + } + pods = append(pods, pod) + } + return pods +} + +func pointerInt32(v int32) *int32 { + return &v +} + +func pointerBool(v bool) *bool { + return &v +} + +func minReadyMaxUnavailableValue(t *testing.T, deployment *apps.Deployment, replicas int32) int { + t.Helper() + if deployment.Spec.Strategy.RollingUpdate == nil || deployment.Spec.Strategy.RollingUpdate.MaxUnavailable == nil { + t.Fatalf("rollingUpdate.maxUnavailable is nil") + } + value, err := intstr.GetScaledValueFromIntOrPercent(deployment.Spec.Strategy.RollingUpdate.MaxUnavailable, int(replicas), true) + if err != nil { + t.Fatalf("scaled maxUnavailable failed: %v", err) + } + return value +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/interface.go b/pkg/controller/batchrelease/control/partitionstyle/interface.go index 90117785..d15095bd 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/interface.go +++ b/pkg/controller/batchrelease/control/partitionstyle/interface.go @@ -17,6 +17,8 @@ limitations under the License. package partitionstyle import ( + "context" + corev1 "k8s.io/api/core/v1" "github.com/openkruise/rollouts/api/v1beta1" @@ -40,11 +42,11 @@ type Interface interface { // Initialize do something before rolling out, for example: // - claim the workload is under our control; // - other things related with specific type of workload, such as 100% partition settings. - Initialize(release *v1beta1.BatchRelease) error + Initialize(ctx context.Context, release *v1beta1.BatchRelease) error // UpgradeBatch upgrade workload according current batch context. - UpgradeBatch(ctx *batchcontext.BatchContext) error + UpgradeBatch(ctx context.Context, batchContext *batchcontext.BatchContext) error // Finalize do something after rolling out, for example: // - free the stable workload from rollout control; // - resume workload if we need. - Finalize(release *v1beta1.BatchRelease) error + Finalize(ctx context.Context, release *v1beta1.BatchRelease) error } diff --git a/pkg/controller/batchrelease/control/partitionstyle/minready_errors.go b/pkg/controller/batchrelease/control/partitionstyle/minready_errors.go new file mode 100644 index 00000000..d27d0709 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/minready_errors.go @@ -0,0 +1,37 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package partitionstyle + +import "errors" + +// Sentinel errors used to classify MinReady degraded conditions into stable +// Prometheus metric labels and event reasons. Producers must wrap them with +// %w so that classification relies on errors.Is instead of message text. +var ( + // ErrMinReadyFeatureGateDisabled indicates the MinReadySecondsStrategy + // feature gate is disabled while a MinReady operation was requested. + ErrMinReadyFeatureGateDisabled = errors.New("feature gate is disabled") + + // ErrMinReadyAnnotationInvalid covers missing, empty or malformed + // MinReady original-strategy annotations. + ErrMinReadyAnnotationInvalid = errors.New("original annotation invalid") + + // ErrMinReadyDriftDetected indicates the inflated Deployment fields were + // changed externally (GitOps reconcile, manual kubectl, etc.). Its text + // doubles as the warning event reason. + ErrMinReadyDriftDetected = errors.New("MinReadyDegradedDriftDetected") +) diff --git a/pkg/controller/batchrelease/control/partitionstyle/minready_status.go b/pkg/controller/batchrelease/control/partitionstyle/minready_status.go new file mode 100644 index 00000000..b56a9890 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/minready_status.go @@ -0,0 +1,188 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package partitionstyle + +import ( + "context" + "errors" + "time" + + v1 "k8s.io/api/core/v1" + "k8s.io/client-go/tools/record" + + "github.com/openkruise/rollouts/api/v1beta1" + batchcontext "github.com/openkruise/rollouts/pkg/controller/batchrelease/context" + brmetrics "github.com/openkruise/rollouts/pkg/controller/batchrelease/metrics" + "github.com/openkruise/rollouts/pkg/util" +) + +// MinReadyStatusBinder injects BatchRelease status/event dependencies into +// MinReadyControl before lifecycle methods run. +type MinReadyStatusBinder interface { + BindMinReadyStatus(release *v1beta1.BatchRelease, status *v1beta1.BatchReleaseStatus, recorder record.EventRecorder) +} + +// MinReadyLifecycle records MinReady-specific status from control-plane batch +// paths that are not Initialize/UpgradeBatch/Finalize. +type MinReadyLifecycle interface { + RecordZeroReplicaBatching() + RecordBatchAdvanced() + RecordZeroReplicaBatchReady() + RecordBatchReady() + ObserveBatchWait() + RecordOperationFailed(reason string, err error) + RecordInitialized() + RecordFinalized() +} + +// MinReadyDriftReconciler converges inflated maxUnavailable back to the active +// batch target. EnsureBatchPodsReadyAndLabeled calls it so external drift is +// healed even while BatchRelease waits in ReadyBatchState for rollout resume. +type MinReadyDriftReconciler interface { + ReconcileMaxUnavailableDrift(ctx context.Context, batchContext *batchcontext.BatchContext) error +} + +type MinReadyStatusWriter struct { + release *v1beta1.BatchRelease + status *v1beta1.BatchReleaseStatus + recorder record.EventRecorder +} + +func NewMinReadyStatusWriter(release *v1beta1.BatchRelease, status *v1beta1.BatchReleaseStatus, recorder record.EventRecorder) *MinReadyStatusWriter { + return &MinReadyStatusWriter{ + release: release, + status: status, + recorder: recorder, + } +} + +func (w *MinReadyStatusWriter) BatchRelease() *v1beta1.BatchRelease { + if w == nil { + return nil + } + return w.release +} + +func (w *MinReadyStatusWriter) BatchReleaseStatus() *v1beta1.BatchReleaseStatus { + if w == nil { + return nil + } + return w.status +} + +func (w *MinReadyStatusWriter) RecordNormal(condType v1beta1.RolloutConditionType, reason, message string) { + if w == nil || w.status == nil { + return + } + previousCondition := util.GetBatchReleaseCondition(*w.status, condType) + condition := util.NewRolloutCondition(condType, v1.ConditionTrue, reason, message) + util.SetBatchReleaseCondition(w.status, *condition) + if reason == "MinReadyFinalized" { + clearMinReadyDegraded(w.status) + w.status.Message = "" + } + if reason == "MinReadyBatchReady" { + observeMinReadyBatchDuration(w.release, previousCondition) + brmetrics.RecordMinReadyBatch(w.release, brmetrics.BatchResultSuccess) + } + if reason == "MinReadyBatchReady" || reason == "MinReadyFinalized" { + brmetrics.ClearMinReadyStuckSeconds(w.release, brmetrics.StuckReasonBatchReadyTimeout) + } + if w.recorder != nil && w.release != nil { + w.recorder.Event(w.release, v1.EventTypeNormal, reason, message) + } +} + +func (w *MinReadyStatusWriter) RecordDegraded(reason string, err error) { + if w == nil || w.status == nil || err == nil { + return + } + message := err.Error() + classified := classifyMinReadyDegradedReason(reason, err) + eventReason := classified.event + condition := util.NewRolloutCondition(v1beta1.RolloutConditionMinReadyDegraded, v1.ConditionTrue, eventReason, message) + util.SetBatchReleaseCondition(w.status, *condition) + w.status.Message = message + degradedReason := classified.metric + brmetrics.ClearMinReadyStuckSeconds(w.release, brmetrics.StuckReasonBatchReadyTimeout) + brmetrics.RecordMinReadyBatch(w.release, brmetrics.BatchResultDegraded) + brmetrics.RecordMinReadyDegraded(w.release, degradedReason) + if w.recorder != nil && w.release != nil { + w.recorder.Event(w.release, v1.EventTypeWarning, eventReason, message) + } +} + +func observeMinReadyBatchDuration(release *v1beta1.BatchRelease, condition *v1beta1.RolloutCondition) { + if release == nil || condition == nil || condition.LastTransitionTime.IsZero() { + return + } + duration := time.Since(condition.LastTransitionTime.Time) + if duration < 0 { + return + } + brmetrics.ObserveMinReadyBatchDuration(release, duration) +} + +// ObserveMinReadyBatchWait updates the stuck-seconds metric while a batch waits to become ready. +func ObserveMinReadyBatchWait(release *v1beta1.BatchRelease, condition *v1beta1.RolloutCondition) { + if release == nil || condition == nil || condition.LastTransitionTime.IsZero() { + return + } + duration := time.Since(condition.LastTransitionTime.Time) + if duration < 0 { + return + } + brmetrics.SetMinReadyStuckSeconds(release, brmetrics.StuckReasonBatchReadyTimeout, duration.Seconds()) +} + +func clearMinReadyDegraded(status *v1beta1.BatchReleaseStatus) { + condition := util.NewRolloutCondition(v1beta1.RolloutConditionMinReadyDegraded, v1.ConditionFalse, "MinReadyHealthy", "") + util.SetBatchReleaseCondition(status, *condition) +} + +type minReadyDegradedReason struct { + metric string + event string +} + +// classifyMinReadyDegradedReason maps a degraded error onto a stable metric +// label and event reason via errors.Is, so the classification does not depend +// on human-readable error text. Producers wrap the sentinels in minready_errors.go +// with %w; fallback is used as the event reason for unclassified errors. +func classifyMinReadyDegradedReason(fallback string, err error) minReadyDegradedReason { + switch { + case errors.Is(err, ErrMinReadyFeatureGateDisabled): + return minReadyDegradedReason{ + metric: brmetrics.DegradedReasonFeatureGateDisabled, + event: "MinReadyFeatureGateDisabled", + } + case errors.Is(err, ErrMinReadyAnnotationInvalid): + return minReadyDegradedReason{ + metric: brmetrics.DegradedReasonMissingAnnotations, + event: "MinReadyDegradedMissingAnnotations", + } + case errors.Is(err, ErrMinReadyDriftDetected): + return minReadyDegradedReason{ + metric: brmetrics.DegradedReasonGitOpsDrift, + event: "MinReadyDegradedDriftDetected", + } + } + return minReadyDegradedReason{ + metric: brmetrics.DegradedReasonControllerError, + event: fallback, + } +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/minready_status_test.go b/pkg/controller/batchrelease/control/partitionstyle/minready_status_test.go new file mode 100644 index 00000000..8e3fd5e9 --- /dev/null +++ b/pkg/controller/batchrelease/control/partitionstyle/minready_status_test.go @@ -0,0 +1,246 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package partitionstyle + +import ( + "errors" + "fmt" + "testing" + "time" + + dto "github.com/prometheus/client_model/go" + apps "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/record" + ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" + + "github.com/openkruise/rollouts/api/v1beta1" + brmetrics "github.com/openkruise/rollouts/pkg/controller/batchrelease/metrics" + "github.com/openkruise/rollouts/pkg/feature" + "github.com/openkruise/rollouts/pkg/util" + utilfeature "github.com/openkruise/rollouts/pkg/util/feature" +) + +func TestRecordMinReadyNormalObservesBatchDuration(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := &v1beta1.BatchRelease{ + ObjectMeta: metav1.ObjectMeta{Name: "duration-rollout", Namespace: "default"}, + Spec: v1beta1.BatchReleaseSpec{ + WorkloadRef: v1beta1.ObjectRef{APIVersion: apps.SchemeGroupVersion.String(), Kind: "Deployment", Name: "demo"}, + ReleasePlan: v1beta1.ReleasePlan{RollingStyle: v1beta1.PartitionRollingStyle}, + }, + } + status := &v1beta1.BatchReleaseStatus{} + startedAt := metav1.NewTime(time.Now().Add(-3 * time.Second)) + util.SetBatchReleaseCondition(status, v1beta1.RolloutCondition{ + Type: v1beta1.RolloutConditionMinReadyBatching, + Status: v1.ConditionTrue, + Reason: "MinReadyBatching", + Message: "MinReadySeconds strategy advanced the current batch", + LastTransitionTime: startedAt, + LastUpdateTime: startedAt, + }) + + rc := &MinReadyStatusWriter{ + release: release, + status: status, + recorder: record.NewFakeRecorder(1), + } + + rc.RecordNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatchReady", "MinReadySeconds strategy batch is ready") + + histogram := findHistogramMetric(t, "rollout_minready_batch_duration_seconds", map[string]string{ + "rollout": release.Name, + "namespace": release.Namespace, + }) + if histogram.GetSampleCount() == 0 { + t.Fatalf("histogram sample count = %d, want > 0", histogram.GetSampleCount()) + } + if status.Message != "" { + t.Fatalf("status.message = %q, want empty", status.Message) + } +} + +func TestRecordMinReadyNormalKeepsDegradedUntilFinalize(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := &v1beta1.BatchRelease{ + ObjectMeta: metav1.ObjectMeta{Name: "degraded-rollout", Namespace: "default"}, + Spec: v1beta1.BatchReleaseSpec{ + WorkloadRef: v1beta1.ObjectRef{APIVersion: apps.SchemeGroupVersion.String(), Kind: "Deployment", Name: "demo"}, + ReleasePlan: v1beta1.ReleasePlan{RollingStyle: v1beta1.PartitionRollingStyle}, + }, + } + status := &v1beta1.BatchReleaseStatus{Message: "annotation missing"} + util.SetBatchReleaseCondition(status, v1beta1.RolloutCondition{ + Type: v1beta1.RolloutConditionMinReadyDegraded, + Status: v1.ConditionTrue, + Reason: "MinReadyDegradedMissingAnnotations", + }) + rc := &MinReadyStatusWriter{ + release: release, + status: status, + recorder: record.NewFakeRecorder(2), + } + + rc.RecordNormal(v1beta1.RolloutConditionMinReadyBatching, "MinReadyBatching", "MinReadySeconds strategy advanced the current batch") + + degraded := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyDegraded) + if degraded == nil || degraded.Status != v1.ConditionTrue { + t.Fatalf("degraded condition = %v, want still true after batching", degraded) + } + if status.Message != "annotation missing" { + t.Fatalf("status.message = %q, want previous degraded message", status.Message) + } + + rc.RecordNormal(v1beta1.RolloutConditionMinReadyFinalized, "MinReadyFinalized", "MinReadySeconds strategy finalized") + + degraded = util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyDegraded) + if degraded == nil || degraded.Status != v1.ConditionFalse { + t.Fatalf("degraded condition = %v, want false after finalize", degraded) + } + if status.Message != "" { + t.Fatalf("status.message = %q, want empty after finalize", status.Message) + } +} + +func TestObserveMinReadyBatchWaitSetsStuckGauge(t *testing.T) { + release := &v1beta1.BatchRelease{ + ObjectMeta: metav1.ObjectMeta{Name: "stuck-rollout", Namespace: "default"}, + } + startedAt := metav1.NewTime(time.Now().Add(-4 * time.Second)) + condition := &v1beta1.RolloutCondition{ + Type: v1beta1.RolloutConditionMinReadyBatching, + Status: v1.ConditionTrue, + Reason: "MinReadyBatching", + Message: "MinReadySeconds strategy advanced the current batch", + LastTransitionTime: startedAt, + LastUpdateTime: startedAt, + } + + ObserveMinReadyBatchWait(release, condition) + + gauge := findGaugeMetric(t, "rollout_minready_stuck_seconds", map[string]string{ + "rollout": release.Name, + "namespace": release.Namespace, + "reason": "batch_ready_timeout", + }) + if gauge.GetValue() <= 0 { + t.Fatalf("gauge value = %v, want > 0", gauge.GetValue()) + } +} + +func TestClassifyMinReadyDegradedReason(t *testing.T) { + cases := []struct { + name string + err error + metric string + event string + }{ + { + name: "drift", + err: fmt.Errorf("MinReadyControl.UpgradeBatch[1]: %w: maxUnavailable=3 exceeds target=2", ErrMinReadyDriftDetected), + metric: brmetrics.DegradedReasonGitOpsDrift, + event: "MinReadyDegradedDriftDetected", + }, + { + name: "feature gate disabled", + err: fmt.Errorf("MinReadyControl.Initialize: %w", ErrMinReadyFeatureGateDisabled), + metric: brmetrics.DegradedReasonFeatureGateDisabled, + event: "MinReadyFeatureGateDisabled", + }, + { + name: "annotation invalid", + err: fmt.Errorf("annotation foo missing: %w", ErrMinReadyAnnotationInvalid), + metric: brmetrics.DegradedReasonMissingAnnotations, + event: "MinReadyDegradedMissingAnnotations", + }, + { + name: "unclassified falls back", + err: errors.New("some controller error"), + metric: brmetrics.DegradedReasonControllerError, + event: "MinReadyBatchingFailed", + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := classifyMinReadyDegradedReason("MinReadyBatchingFailed", tc.err) + if got.metric != tc.metric { + t.Fatalf("metric reason = %q, want %q", got.metric, tc.metric) + } + if got.event != tc.event { + t.Fatalf("event reason = %q, want %q", got.event, tc.event) + } + }) + } +} + +func findHistogramMetric(t *testing.T, name string, labels map[string]string) *dto.Histogram { + t.Helper() + families, err := ctrlmetrics.Registry.Gather() + if err != nil { + t.Fatalf("gather metrics failed: %v", err) + } + for _, family := range families { + if family.GetName() != name { + continue + } + for _, metric := range family.GetMetric() { + if metricLabelsMatch(metric, labels) { + return metric.GetHistogram() + } + } + } + t.Fatalf("histogram %s with labels %v not found", name, labels) + return nil +} + +func findGaugeMetric(t *testing.T, name string, labels map[string]string) *dto.Gauge { + t.Helper() + families, err := ctrlmetrics.Registry.Gather() + if err != nil { + t.Fatalf("gather metrics failed: %v", err) + } + for _, family := range families { + if family.GetName() != name { + continue + } + for _, metric := range family.GetMetric() { + if metricLabelsMatch(metric, labels) { + return metric.GetGauge() + } + } + } + t.Fatalf("gauge %s with labels %v not found", name, labels) + return nil +} + +func metricLabelsMatch(metric *dto.Metric, labels map[string]string) bool { + for key, want := range labels { + matched := false + for _, pair := range metric.GetLabel() { + if pair.GetName() == key && pair.GetValue() == want { + matched = true + break + } + } + if !matched { + return false + } + } + return true +} diff --git a/pkg/controller/batchrelease/control/partitionstyle/nativedaemonset/control.go b/pkg/controller/batchrelease/control/partitionstyle/nativedaemonset/control.go index 9cc890fe..9c25a336 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/nativedaemonset/control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/nativedaemonset/control.go @@ -94,7 +94,7 @@ func (rc *realController) ListOwnedPods() ([]*corev1.Pod, error) { } // Initialize prepares the native DaemonSet for batch release by setting the appropriate update strategy. -func (rc *realController) Initialize(release *v1beta1.BatchRelease) error { +func (rc *realController) Initialize(ctx context.Context, release *v1beta1.BatchRelease) error { if control.IsControlledByBatchRelease(release, rc.object) { return nil } @@ -127,21 +127,21 @@ func (rc *realController) Initialize(release *v1beta1.BatchRelease) error { return fmt.Errorf("failed to marshal patch: %v", err) } - return rc.client.Patch(context.TODO(), daemon, client.RawPatch(types.MergePatchType, patchBytes)) + return rc.client.Patch(ctx, daemon, client.RawPatch(types.MergePatchType, patchBytes)) } // UpgradeBatch handles the batch upgrade for native DaemonSet by managing annotations. // The actual pod deletion is handled by the advanced-daemonset-controller. -func (rc *realController) UpgradeBatch(ctx *batchcontext.BatchContext) error { +func (rc *realController) UpgradeBatch(ctx context.Context, batchContext *batchcontext.BatchContext) error { // Check if the DaemonSet already has the partition annotation currentPartitionStr, _ := util.ParseDaemonSetAdvancedControl(rc.object.Annotations) - desiredPartitionStr := ctx.DesiredPartition.String() + desiredPartitionStr := batchContext.DesiredPartition.String() // If annotation is missing or doesn't equal desired value, patch the DaemonSet if currentPartitionStr != desiredPartitionStr { klog.Infof("Updating partition annotation for DaemonSet %s/%s: %s -> %s", rc.object.Namespace, rc.object.Name, currentPartitionStr, desiredPartitionStr) - return rc.patchBatchAnnotations(ctx) + return rc.patchBatchAnnotations(ctx, batchContext) } // Partition annotation already matches desired value, no action needed @@ -151,10 +151,10 @@ func (rc *realController) UpgradeBatch(ctx *batchcontext.BatchContext) error { } // patchBatchAnnotations patches the DaemonSet with batch control annotations -func (rc *realController) patchBatchAnnotations(ctx *batchcontext.BatchContext) error { +func (rc *realController) patchBatchAnnotations(ctx context.Context, batchContext *batchcontext.BatchContext) error { // Use SetDaemonSetAdvancedControl to set annotations annotations := make(map[string]string) - util.SetDaemonSetAdvancedControl(annotations, ctx.DesiredPartition.String(), ctx.UpdateRevision) + util.SetDaemonSetAdvancedControl(annotations, batchContext.DesiredPartition.String(), batchContext.UpdateRevision) // Create patch with batch annotations patch := map[string]interface{}{ @@ -169,11 +169,11 @@ func (rc *realController) patchBatchAnnotations(ctx *batchcontext.BatchContext) } daemon := util.GetEmptyObjectWithKey(rc.object) - return rc.client.Patch(context.TODO(), daemon, client.RawPatch(types.MergePatchType, patchBytes)) + return rc.client.Patch(ctx, daemon, client.RawPatch(types.MergePatchType, patchBytes)) } // Finalize cleans up the annotations and restores the original update strategy. -func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { +func (rc *realController) Finalize(ctx context.Context, release *v1beta1.BatchRelease) error { if rc.object == nil { return nil } @@ -205,7 +205,7 @@ func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { daemon := util.GetEmptyObjectWithKey(rc.object) - return rc.client.Patch(context.TODO(), daemon, client.RawPatch(types.MergePatchType, []byte(body))) + return rc.client.Patch(ctx, daemon, client.RawPatch(types.MergePatchType, []byte(body))) } // CalculateBatchContext calculates the batch context for native DaemonSet. diff --git a/pkg/controller/batchrelease/control/partitionstyle/nativedaemonset/control_test.go b/pkg/controller/batchrelease/control/partitionstyle/nativedaemonset/control_test.go index c4b3bdc5..5fe15a08 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/nativedaemonset/control_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/nativedaemonset/control_test.go @@ -433,7 +433,7 @@ func TestInitialize(t *testing.T) { controller := NewController(cli, key, gvk) builtController, _ := controller.BuildController() - err := builtController.Initialize(batchReleaseDemo) + err := builtController.Initialize(context.Background(), batchReleaseDemo) assert.NoError(t, err) // Verify the DaemonSet was updated correctly @@ -467,7 +467,7 @@ func TestInitializeAlreadyControlled(t *testing.T) { controller := NewController(cli, key, gvk) builtController, _ := controller.BuildController() - err := builtController.Initialize(batchReleaseDemo) + err := builtController.Initialize(context.Background(), batchReleaseDemo) assert.NoError(t, err) // Verify the DaemonSet was not changed @@ -497,7 +497,7 @@ func TestInitializeWithRollingUpdateStrategy(t *testing.T) { controller := NewController(cli, key, gvk) builtController, _ := controller.BuildController() - err := builtController.Initialize(batchReleaseDemo) + err := builtController.Initialize(context.Background(), batchReleaseDemo) assert.NoError(t, err) // Verify the DaemonSet was updated correctly @@ -540,7 +540,7 @@ func TestInitialize_PatchError(t *testing.T) { rc.object = daemon rc.WorkloadInfo = util.ParseWorkload(daemon) - err := rc.Initialize(batchReleaseDemo) + err := rc.Initialize(context.Background(), batchReleaseDemo) assert.Error(t, err) // Should fail because daemon doesn't exist in client } @@ -568,7 +568,7 @@ func TestUpgradeBatchFirstTime(t *testing.T) { Replicas: 5, } - err := builtController.UpgradeBatch(ctx) + err := builtController.UpgradeBatch(context.Background(), ctx) assert.NoError(t, err) // Verify the DaemonSet has the batch annotations with JSON format @@ -611,7 +611,7 @@ func TestUpgradeBatchSamePartition(t *testing.T) { Replicas: 5, } - err := builtController.UpgradeBatch(ctx) + err := builtController.UpgradeBatch(context.Background(), ctx) assert.NoError(t, err) // Verify the DaemonSet annotations remain unchanged (no additional patch call) @@ -651,7 +651,7 @@ func TestUpgradeBatchDifferentPartition(t *testing.T) { Replicas: 5, } - err := builtController.UpgradeBatch(ctx) + err := builtController.UpgradeBatch(context.Background(), ctx) assert.NoError(t, err) // Verify the DaemonSet annotations are updated to the new partition @@ -693,7 +693,7 @@ func TestPatchBatchAnnotations_PatchError(t *testing.T) { UpdateRevision: "update-revision-123", } - err := rc.patchBatchAnnotations(ctx) + err := rc.patchBatchAnnotations(context.Background(), ctx) assert.Error(t, err) // Should fail because daemon doesn't exist in client } @@ -719,7 +719,7 @@ func TestFinalizeWithBatchPartitionNil(t *testing.T) { completedBatchRelease := batchReleaseDemo.DeepCopy() completedBatchRelease.Spec.ReleasePlan.BatchPartition = nil - err := builtController.Finalize(completedBatchRelease) + err := builtController.Finalize(context.Background(), completedBatchRelease) assert.NoError(t, err) // Verify the DaemonSet was updated correctly @@ -762,7 +762,7 @@ func TestFinalizeWithOriginalRollingUpdateStrategy(t *testing.T) { completedBatchRelease := batchReleaseDemo.DeepCopy() completedBatchRelease.Spec.ReleasePlan.BatchPartition = nil - err := builtController.Finalize(completedBatchRelease) + err := builtController.Finalize(context.Background(), completedBatchRelease) assert.NoError(t, err) // Verify the DaemonSet was updated correctly @@ -807,7 +807,7 @@ func TestFinalizeWithOriginalOnDeleteStrategy(t *testing.T) { completedBatchRelease := batchReleaseDemo.DeepCopy() completedBatchRelease.Spec.ReleasePlan.BatchPartition = nil - err := builtController.Finalize(completedBatchRelease) + err := builtController.Finalize(context.Background(), completedBatchRelease) assert.NoError(t, err) // Verify the DaemonSet was updated correctly @@ -852,7 +852,7 @@ func TestFinalizeWithMissingOriginalStrategy(t *testing.T) { completedBatchRelease := batchReleaseDemo.DeepCopy() completedBatchRelease.Spec.ReleasePlan.BatchPartition = nil - err := builtController.Finalize(completedBatchRelease) + err := builtController.Finalize(context.Background(), completedBatchRelease) assert.NoError(t, err) // Verify the DaemonSet was updated correctly @@ -896,7 +896,7 @@ func TestFinalizeWithBatchPartitionNotNil(t *testing.T) { batchPartition := int32(1) inProgressBatchRelease.Spec.ReleasePlan.BatchPartition = &batchPartition - err := builtController.Finalize(inProgressBatchRelease) + err := builtController.Finalize(context.Background(), inProgressBatchRelease) assert.NoError(t, err) // Verify the DaemonSet was updated correctly @@ -929,7 +929,7 @@ func TestFinalize_NilObject(t *testing.T) { rc := controller.(*realController) rc.object = nil // Set object to nil - err := rc.Finalize(batchReleaseDemo) + err := rc.Finalize(context.Background(), batchReleaseDemo) assert.NoError(t, err) // Should return without error } @@ -953,7 +953,7 @@ func TestFinalize_PatchError(t *testing.T) { rc.object = daemon rc.WorkloadInfo = util.ParseWorkload(daemon) - err := rc.Finalize(batchReleaseDemo) + err := rc.Finalize(context.Background(), batchReleaseDemo) assert.Error(t, err) // Should fail because daemon doesn't exist in client } diff --git a/pkg/controller/batchrelease/control/partitionstyle/statefulset/control.go b/pkg/controller/batchrelease/control/partitionstyle/statefulset/control.go index 405dd36a..33241582 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/statefulset/control.go +++ b/pkg/controller/batchrelease/control/partitionstyle/statefulset/control.go @@ -100,7 +100,7 @@ func (rc *realController) ListOwnedPods() ([]*corev1.Pod, error) { return rc.pods, err } -func (rc *realController) Initialize(release *v1beta1.BatchRelease) error { +func (rc *realController) Initialize(ctx context.Context, release *v1beta1.BatchRelease) error { if control.IsControlledByBatchRelease(release, rc.object) { return nil } @@ -111,12 +111,12 @@ func (rc *realController) Initialize(release *v1beta1.BatchRelease) error { body := fmt.Sprintf(`{%s,%s}`, metaBody, specBody) clone := util.GetEmptyObjectWithKey(rc.object) - return rc.client.Patch(context.TODO(), clone, client.RawPatch(types.MergePatchType, []byte(body))) + return rc.client.Patch(ctx, clone, client.RawPatch(types.MergePatchType, []byte(body))) } -func (rc *realController) UpgradeBatch(ctx *batchcontext.BatchContext) error { - desired := ctx.DesiredPartition.IntVal - current := ctx.CurrentPartition.IntVal +func (rc *realController) UpgradeBatch(ctx context.Context, batchContext *batchcontext.BatchContext) error { + desired := batchContext.DesiredPartition.IntVal + current := batchContext.CurrentPartition.IntVal // current less than desired, which means current revision replicas will be less than desired, // in other word, update revision replicas will be more than desired, no need to update again. if current <= desired { @@ -126,10 +126,10 @@ func (rc *realController) UpgradeBatch(ctx *batchcontext.BatchContext) error { body := fmt.Sprintf(`{"spec":{"updateStrategy":{"rollingUpdate":{"partition":%d}}}}`, desired) clone := rc.object.DeepCopyObject().(client.Object) - return rc.client.Patch(context.TODO(), clone, client.RawPatch(types.MergePatchType, []byte(body))) + return rc.client.Patch(ctx, clone, client.RawPatch(types.MergePatchType, []byte(body))) } -func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { +func (rc *realController) Finalize(ctx context.Context, release *v1beta1.BatchRelease) error { if rc.object == nil { return nil } @@ -144,7 +144,7 @@ func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { } body := fmt.Sprintf(`{"metadata":{"annotations":{"%s":null}}%s}`, util.BatchReleaseControlAnnotation, specBody) clone := util.GetEmptyObjectWithKey(rc.object) - return rc.client.Patch(context.TODO(), clone, client.RawPatch(types.MergePatchType, []byte(body))) + return rc.client.Patch(ctx, clone, client.RawPatch(types.MergePatchType, []byte(body))) } func (rc *realController) CalculateBatchContext(release *v1beta1.BatchRelease) (*batchcontext.BatchContext, error) { diff --git a/pkg/controller/batchrelease/control/partitionstyle/statefulset/control_test.go b/pkg/controller/batchrelease/control/partitionstyle/statefulset/control_test.go index b78730e9..20ca7ee3 100644 --- a/pkg/controller/batchrelease/control/partitionstyle/statefulset/control_test.go +++ b/pkg/controller/batchrelease/control/partitionstyle/statefulset/control_test.go @@ -580,7 +580,7 @@ func TestRealController(t *testing.T) { controller, err := c.BuildController() Expect(err).NotTo(HaveOccurred()) - err = controller.Initialize(release) + err = controller.Initialize(context.Background(), release) Expect(err).NotTo(HaveOccurred()) fetch := &kruiseappsv1beta1.StatefulSet{} Expect(cli.Get(context.TODO(), stsKey, fetch)).NotTo(HaveOccurred()) @@ -591,7 +591,7 @@ func TestRealController(t *testing.T) { for { batchContext, err := controller.CalculateBatchContext(release) Expect(err).NotTo(HaveOccurred()) - err = controller.UpgradeBatch(batchContext) + err = controller.UpgradeBatch(context.Background(), batchContext) // mock fetch = &kruiseappsv1beta1.StatefulSet{} Expect(cli.Get(context.TODO(), stsKey, fetch)).NotTo(HaveOccurred()) @@ -605,11 +605,11 @@ func TestRealController(t *testing.T) { Expect(*fetch.Spec.UpdateStrategy.RollingUpdate.Partition).Should(BeNumerically("==", 9)) // mock - _ = controller.Finalize(release) + _ = controller.Finalize(context.Background(), release) fetch = &kruiseappsv1beta1.StatefulSet{} Expect(cli.Get(context.TODO(), stsKey, fetch)).NotTo(HaveOccurred()) c.object = fetch - err = controller.Finalize(release) + err = controller.Finalize(context.Background(), release) Expect(err).NotTo(HaveOccurred()) fetch = &kruiseappsv1beta1.StatefulSet{} Expect(cli.Get(context.TODO(), stsKey, fetch)).NotTo(HaveOccurred()) @@ -737,7 +737,7 @@ func TestFinalize(t *testing.T) { t.Fatalf("BuildController failed: %s", err.Error()) } cs.featureGateFunc() - err = c.Finalize(br) + err = c.Finalize(context.Background(), br) if err != nil { t.Fatalf("BuildController failed: %s", err.Error()) } diff --git a/pkg/controller/batchrelease/metrics/minready_metrics.go b/pkg/controller/batchrelease/metrics/minready_metrics.go new file mode 100644 index 00000000..6bc85069 --- /dev/null +++ b/pkg/controller/batchrelease/metrics/minready_metrics.go @@ -0,0 +1,114 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "time" + + "github.com/prometheus/client_golang/prometheus" + ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" + + "github.com/openkruise/rollouts/api/v1beta1" +) + +const ( + BatchResultSuccess = "success" + BatchResultStuck = "stuck" + BatchResultDegraded = "degraded" + + DegradedReasonControllerError = "controller_error" + DegradedReasonFeatureGateDisabled = "feature_gate_disabled" + DegradedReasonGitOpsDrift = "gitops_drift" + DegradedReasonMissingAnnotations = "missing_annotations" + StuckReasonBatchReadyTimeout = "batch_ready_timeout" +) + +var ( + minReadyBatchesTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "rollout_minready_batches_total", + Help: "Total number of MinReadySeconds rollout batches by result.", + }, + []string{"rollout", "namespace", "result"}, + ) + minReadyBatchDurationSeconds = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "rollout_minready_batch_duration_seconds", + Help: "Duration in seconds from MinReadySeconds batch upgrade to readiness.", + Buckets: []float64{5, 15, 30, 60, 180, 600, 1800}, + }, + []string{"rollout", "namespace"}, + ) + minReadyStuckSeconds = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "rollout_minready_stuck_seconds", + Help: "Current MinReadySeconds stuck duration in seconds by reason.", + }, + []string{"rollout", "namespace", "reason"}, + ) + minReadyDegradedTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "rollout_minready_degraded_total", + Help: "Total number of MinReadySeconds degraded transitions by reason.", + }, + []string{"rollout", "namespace", "reason"}, + ) +) + +func init() { + ctrlmetrics.Registry.MustRegister( + minReadyBatchesTotal, + minReadyBatchDurationSeconds, + minReadyStuckSeconds, + minReadyDegradedTotal, + ) +} + +func RecordMinReadyBatch(release *v1beta1.BatchRelease, result string) { + if release == nil { + return + } + minReadyBatchesTotal.WithLabelValues(release.Name, release.Namespace, result).Inc() +} + +func ObserveMinReadyBatchDuration(release *v1beta1.BatchRelease, duration time.Duration) { + if release == nil { + return + } + minReadyBatchDurationSeconds.WithLabelValues(release.Name, release.Namespace).Observe(duration.Seconds()) +} + +func SetMinReadyStuckSeconds(release *v1beta1.BatchRelease, reason string, seconds float64) { + if release == nil { + return + } + minReadyStuckSeconds.WithLabelValues(release.Name, release.Namespace, reason).Set(seconds) +} + +func ClearMinReadyStuckSeconds(release *v1beta1.BatchRelease, reason string) { + if release == nil { + return + } + minReadyStuckSeconds.WithLabelValues(release.Name, release.Namespace, reason).Set(0) +} + +func RecordMinReadyDegraded(release *v1beta1.BatchRelease, reason string) { + if release == nil { + return + } + minReadyDegradedTotal.WithLabelValues(release.Name, release.Namespace, reason).Inc() +} diff --git a/pkg/controller/batchrelease/metrics/minready_metrics_test.go b/pkg/controller/batchrelease/metrics/minready_metrics_test.go new file mode 100644 index 00000000..c0beac37 --- /dev/null +++ b/pkg/controller/batchrelease/metrics/minready_metrics_test.go @@ -0,0 +1,85 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "testing" + "time" + + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/openkruise/rollouts/api/v1beta1" +) + +func TestMinReadyMetricsRecorders(t *testing.T) { + release := &v1beta1.BatchRelease{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rollout-a", + Namespace: "default", + }, + } + + RecordMinReadyBatch(release, BatchResultSuccess) + ObserveMinReadyBatchDuration(release, 2*time.Second) + SetMinReadyStuckSeconds(release, StuckReasonBatchReadyTimeout, 3) + ClearMinReadyStuckSeconds(release, StuckReasonBatchReadyTimeout) + RecordMinReadyDegraded(release, DegradedReasonControllerError) + + assertCounterPositive(t, minReadyBatchesTotal.WithLabelValues("rollout-a", "default", BatchResultSuccess)) + histogram, ok := minReadyBatchDurationSeconds.WithLabelValues("rollout-a", "default").(prometheus.Metric) + if !ok { + t.Fatalf("histogram observer does not implement prometheus.Metric") + } + assertHistogramCountPositive(t, histogram) + assertGaugeValue(t, minReadyStuckSeconds.WithLabelValues("rollout-a", "default", StuckReasonBatchReadyTimeout), 0) + assertCounterPositive(t, minReadyDegradedTotal.WithLabelValues("rollout-a", "default", DegradedReasonControllerError)) +} + +func assertCounterPositive(t *testing.T, metric interface{ Write(*dto.Metric) error }) { + t.Helper() + var got dto.Metric + if err := metric.Write(&got); err != nil { + t.Fatalf("write metric failed: %v", err) + } + if got.Counter == nil || got.Counter.GetValue() <= 0 { + t.Fatalf("counter = %v, want positive", got.Counter) + } +} + +func assertGaugeValue(t *testing.T, metric interface{ Write(*dto.Metric) error }, want float64) { + t.Helper() + var got dto.Metric + if err := metric.Write(&got); err != nil { + t.Fatalf("write metric failed: %v", err) + } + if got.Gauge == nil || got.Gauge.GetValue() != want { + t.Fatalf("gauge = %v, want %v", got.Gauge, want) + } +} + +func assertHistogramCountPositive(t *testing.T, metric interface{ Write(*dto.Metric) error }) { + t.Helper() + var got dto.Metric + if err := metric.Write(&got); err != nil { + t.Fatalf("write metric failed: %v", err) + } + if got.Histogram == nil || got.Histogram.GetSampleCount() == 0 { + t.Fatalf("histogram = %v, want sample count > 0", got.Histogram) + } +} diff --git a/pkg/feature/rollout_features.go b/pkg/feature/rollout_features.go index f8f48f16..fd3a8c10 100644 --- a/pkg/feature/rollout_features.go +++ b/pkg/feature/rollout_features.go @@ -33,6 +33,8 @@ const ( // If the rollout CR is deleted during the rollout process, `pause=false` and `partition=0` will be set, causing the workload to complete deployment. // If `KeepWorkloadPausedOnRolloutDeletion` is set, the state during deployment will be preserved(Keep partition > 0), enabling users to perform rollback operations. KeepWorkloadPausedOnRolloutDeletion featuregate.Feature = "KeepWorkloadPausedOnRolloutDeletion" + // MinReadySecondsStrategy enables the alpha Deployment MinReadySeconds rollout strategy. + MinReadySecondsStrategy featuregate.Feature = "MinReadySecondsStrategy" ) var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{ @@ -40,6 +42,7 @@ var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{ AdvancedDeploymentGate: {Default: false, PreRelease: featuregate.Alpha}, AppendServiceSelectorGate: {Default: false, PreRelease: featuregate.Alpha}, KeepWorkloadPausedOnRolloutDeletion: {Default: false, PreRelease: featuregate.Alpha}, + MinReadySecondsStrategy: {Default: false, PreRelease: featuregate.Alpha}, } func init() { diff --git a/pkg/feature/rollout_features_test.go b/pkg/feature/rollout_features_test.go new file mode 100644 index 00000000..55d8e953 --- /dev/null +++ b/pkg/feature/rollout_features_test.go @@ -0,0 +1,29 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package feature + +import ( + "testing" + + utilfeature "github.com/openkruise/rollouts/pkg/util/feature" +) + +func TestMinReadySecondsStrategyDefaultDisabled(t *testing.T) { + if utilfeature.DefaultFeatureGate.Enabled(MinReadySecondsStrategy) { + t.Fatalf("feature gate %s is enabled by default", MinReadySecondsStrategy) + } +} diff --git a/pkg/util/parse_utils.go b/pkg/util/parse_utils.go index 7de786df..b41f8dce 100644 --- a/pkg/util/parse_utils.go +++ b/pkg/util/parse_utils.go @@ -29,6 +29,7 @@ import ( "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -395,11 +396,17 @@ func parseStatusIntFromUnstructured(object *unstructured.Unstructured, field str // ParseStatusStringFromUnstructured can parse some fields with string type from unstructured workload object status func parseStatusStringFromUnstructured(object *unstructured.Unstructured, field string) string { - value, found, err := unstructured.NestedFieldNoCopy(object.Object, "status", field) - if err == nil && found { - return value.(string) + // NestedString returns an error (not a panic) when the field exists but is + // not a string, so a malformed status cannot crash the controller. + value, found, err := unstructured.NestedString(object.Object, "status", field) + if err != nil { + klog.Warningf("failed to parse status.%s as string from %v: %v", field, object.GetName(), err) + return "" + } + if !found { + return "" } - return "" + return value } // parseSelectorFromUnstructured can parse labelSelector as selector from unstructured workload object @@ -408,9 +415,14 @@ func parseSelectorFromUnstructured(object *unstructured.Unstructured) (labels.Se if err != nil || !found { return nil, err } - byteInfo, _ := json.Marshal(m) + byteInfo, err := json.Marshal(m) + if err != nil { + return nil, fmt.Errorf("marshal spec.selector of %v failed: %w", object.GetName(), err) + } labelSelector := &metav1.LabelSelector{} - _ = json.Unmarshal(byteInfo, labelSelector) + if err := json.Unmarshal(byteInfo, labelSelector); err != nil { + return nil, fmt.Errorf("unmarshal spec.selector of %v failed: %w", object.GetName(), err) + } return metav1.LabelSelectorAsSelector(labelSelector) } @@ -421,8 +433,15 @@ func parseTemplateFromUnstructured(object *unstructured.Unstructured) *corev1.Po return nil } template := &corev1.PodTemplateSpec{} - templateByte, _ := json.Marshal(t) - _ = json.Unmarshal(templateByte, template) + templateByte, err := json.Marshal(t) + if err != nil { + klog.Warningf("failed to marshal spec.template of %v: %v", object.GetName(), err) + return nil + } + if err := json.Unmarshal(templateByte, template); err != nil { + klog.Warningf("failed to unmarshal spec.template of %v: %v", object.GetName(), err) + return nil + } return template } @@ -432,17 +451,30 @@ func parseMetadataFromUnstructured(object *unstructured.Unstructured) *metav1.Ob if err != nil || !found { return nil } - data, _ := json.Marshal(m) + data, err := json.Marshal(m) + if err != nil { + klog.Warningf("failed to marshal metadata of %v: %v", object.GetName(), err) + return nil + } meta := &metav1.ObjectMeta{} - _ = json.Unmarshal(data, meta) + if err := json.Unmarshal(data, meta); err != nil { + klog.Warningf("failed to unmarshal metadata of %v: %v", object.GetName(), err) + return nil + } return meta } // unmarshalIntStr return *intstr.IntOrString func unmarshalIntStr(m interface{}) *intstr.IntOrString { field := &intstr.IntOrString{} - data, _ := json.Marshal(m) - _ = json.Unmarshal(data, field) + data, err := json.Marshal(m) + if err != nil { + klog.Warningf("failed to marshal intOrString value %v: %v", m, err) + return field + } + if err := json.Unmarshal(data, field); err != nil { + klog.Warningf("failed to unmarshal intOrString value %v: %v", m, err) + } return field } diff --git a/pkg/util/parse_utils_test.go b/pkg/util/parse_utils_test.go index 4d522118..eb22e12d 100644 --- a/pkg/util/parse_utils_test.go +++ b/pkg/util/parse_utils_test.go @@ -632,3 +632,54 @@ func TestNativeDaemonSetUnstructuredParse(t *testing.T) { Expect(workloadInfo.Status.ObservedGeneration).Should(BeNumerically("==", ds.Status.ObservedGeneration)) }) } + +// TestParseStatusStringFromUnstructuredMalformed verifies that a non-string +// status field no longer panics (it used to do an unchecked type assertion) and +// instead degrades to an empty string. Regression test for the parse_utils +// type-assertion panic fixed alongside the MinReadySeconds review. +func TestParseStatusStringFromUnstructuredMalformed(t *testing.T) { + cases := map[string]struct { + status interface{} + want string + }{ + "string value": {status: "rev-abc", want: "rev-abc"}, + "int value": {status: int64(42), want: ""}, + "map value": {status: map[string]interface{}{"nested": "x"}, want: ""}, + "bool value": {status: true, want: ""}, + "missing field": {status: nil, want: ""}, + } + for name, cs := range cases { + t.Run(name, func(t *testing.T) { + statusMap := map[string]interface{}{} + if cs.status != nil { + statusMap["updateRevision"] = cs.status + } + obj := &unstructured.Unstructured{Object: map[string]interface{}{ + "metadata": map[string]interface{}{"name": "demo"}, + "status": statusMap, + }} + got := parseStatusStringFromUnstructured(obj, "updateRevision") + if got != cs.want { + t.Fatalf("parseStatusStringFromUnstructured = %q, want %q", got, cs.want) + } + }) + } +} + +// TestParseSelectorFromUnstructuredMalformed verifies a malformed selector +// surfaces an error instead of being silently swallowed into an empty selector. +func TestParseSelectorFromUnstructuredMalformed(t *testing.T) { + // spec.selector whose matchLabels is a string (not a map) fails to unmarshal + // into metav1.LabelSelector. + obj := &unstructured.Unstructured{Object: map[string]interface{}{ + "metadata": map[string]interface{}{"name": "demo"}, + "spec": map[string]interface{}{ + "selector": map[string]interface{}{ + "matchLabels": "not-a-map", + }, + }, + }} + if _, err := parseSelectorFromUnstructured(obj); err == nil { + t.Fatalf("parseSelectorFromUnstructured accepted malformed selector, want error") + } +} diff --git a/pkg/util/patch/patch_utils.go b/pkg/util/patch/patch_utils.go index 25b8da15..e07b603b 100644 --- a/pkg/util/patch/patch_utils.go +++ b/pkg/util/patch/patch_utils.go @@ -214,6 +214,21 @@ func (s *DeploymentPatch) UpdateStrategy(strategy apps.DeploymentStrategy) *Depl return s } +func (s *DeploymentPatch) UpdateRecreateStrategy() *DeploymentPatch { + switch s.PatchType { + case types.StrategicMergePatchType, types.MergePatchType: + if _, ok := s.PatchData["spec"]; !ok { + s.PatchData["spec"] = make(map[string]interface{}) + } + spec := s.PatchData["spec"].(map[string]interface{}) + spec["strategy"] = map[string]interface{}{ + "type": apps.RecreateDeploymentStrategyType, + "rollingUpdate": nil, + } + } + return s +} + func (s *DeploymentPatch) UpdatePaused(paused bool) *DeploymentPatch { switch s.PatchType { case types.StrategicMergePatchType, types.MergePatchType: diff --git a/pkg/util/patch/patch_utils_test.go b/pkg/util/patch/patch_utils_test.go index ae806847..e8bb8faf 100644 --- a/pkg/util/patch/patch_utils_test.go +++ b/pkg/util/patch/patch_utils_test.go @@ -17,11 +17,15 @@ limitations under the License. package patch import ( + "encoding/json" "fmt" "reflect" "testing" + apps "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" "github.com/openkruise/rollouts/pkg/util" ) @@ -43,3 +47,143 @@ func TestCommonPatch(t *testing.T) { t.Fatalf("Not equal: \n%s \n%s", expectedPatchBody, patchReq.String()) } } + +func TestMergePatchHelpers(t *testing.T) { + patchReq := NewMergePatch(). + OverrideFinalizer([]string{"finalizer-a"}). + InsertLabel("label-a", "value-a"). + DeleteLabel("label-b"). + InsertAnnotation("annotation-a", "value-b"). + DeleteAnnotation("annotation-b") + + if patchReq.Type() != types.MergePatchType { + t.Fatalf("Type() = %s, want %s", patchReq.Type(), types.MergePatchType) + } + data, err := patchReq.Data(nil) + if err != nil { + t.Fatalf("Data() error = %v", err) + } + if string(data) != patchReq.String() { + t.Fatalf("Data() = %s, want %s", string(data), patchReq.String()) + } + + var got map[string]interface{} + if err := json.Unmarshal(data, &got); err != nil { + t.Fatalf("patch json is malformed: %v", err) + } + metadata := got["metadata"].(map[string]interface{}) + finalizers := metadata["finalizers"].([]interface{}) + if finalizers[0] != "finalizer-a" { + t.Fatalf("finalizers = %v, want finalizer-a", finalizers) + } + labels := metadata["labels"].(map[string]interface{}) + if labels["label-a"] != "value-a" || labels["label-b"] != nil { + t.Fatalf("labels = %v", labels) + } + annotations := metadata["annotations"].(map[string]interface{}) + if annotations["annotation-a"] != "value-b" || annotations["annotation-b"] != nil { + t.Fatalf("annotations = %v", annotations) + } +} + +func TestDeploymentPatchHelpers(t *testing.T) { + progressDeadlineSeconds := int32(600) + maxSurge := intstr.FromString("25%") + maxUnavailable := intstr.FromInt(1) + strategy := apps.DeploymentStrategy{Type: apps.RollingUpdateDeploymentStrategyType} + + strategyPatch := NewDeploymentPatch().UpdateStrategy(strategy) + var strategyOnly map[string]interface{} + if err := json.Unmarshal([]byte(strategyPatch.String()), &strategyOnly); err != nil { + t.Fatalf("strategy patch json is malformed: %v", err) + } + if strategyOnly["spec"].(map[string]interface{})["strategy"].(map[string]interface{})["type"] != string(apps.RollingUpdateDeploymentStrategyType) { + t.Fatalf("strategy patch = %v", strategyOnly) + } + + patchReq := NewDeploymentPatch(). + UpdatePaused(true). + UpdateMinReadySeconds(30). + UpdateProgressDeadlineSeconds(&progressDeadlineSeconds). + UpdateMaxSurge(&maxSurge). + UpdateMaxUnavailable(&maxUnavailable) + + var got map[string]interface{} + if err := json.Unmarshal([]byte(patchReq.String()), &got); err != nil { + t.Fatalf("patch json is malformed: %v", err) + } + spec := got["spec"].(map[string]interface{}) + if spec["paused"] != true { + t.Fatalf("paused = %v, want true", spec["paused"]) + } + if spec["minReadySeconds"] != float64(30) { + t.Fatalf("minReadySeconds = %v, want 30", spec["minReadySeconds"]) + } + if spec["progressDeadlineSeconds"] != float64(600) { + t.Fatalf("progressDeadlineSeconds = %v, want 600", spec["progressDeadlineSeconds"]) + } + rollingUpdate := spec["strategy"].(map[string]interface{})["rollingUpdate"].(map[string]interface{}) + if rollingUpdate["maxSurge"] != "25%" { + t.Fatalf("maxSurge = %v", rollingUpdate["maxSurge"]) + } + if rollingUpdate["maxUnavailable"] != float64(1) { + t.Fatalf("maxUnavailable = %v", rollingUpdate["maxUnavailable"]) + } +} + +func TestDeploymentPatchUpdateRecreateStrategyClearsRollingUpdate(t *testing.T) { + patchReq := NewDeploymentPatch().UpdateRecreateStrategy() + + var got map[string]interface{} + if err := json.Unmarshal([]byte(patchReq.String()), &got); err != nil { + t.Fatalf("patch json is malformed: %v", err) + } + strategy := got["spec"].(map[string]interface{})["strategy"].(map[string]interface{}) + if strategy["type"] != string(apps.RecreateDeploymentStrategyType) { + t.Fatalf("strategy.type = %v, want Recreate", strategy["type"]) + } + if _, ok := strategy["rollingUpdate"]; !ok { + t.Fatalf("rollingUpdate field missing, want explicit null") + } + if strategy["rollingUpdate"] != nil { + t.Fatalf("rollingUpdate = %v, want nil", strategy["rollingUpdate"]) + } +} + +func TestClonesetPatchHelpers(t *testing.T) { + partition := intstr.FromInt(3) + maxSurge := intstr.FromString("20%") + maxUnavailable := intstr.FromInt(1) + + patchReq := NewClonesetPatch(). + UpdateMinReadySeconds(10). + UpdatePaused(true). + UpdatePartiton(&partition). + UpdateMaxSurge(&maxSurge). + UpdateMaxUnavailable(&maxUnavailable) + + if patchReq.Type() != types.MergePatchType { + t.Fatalf("Type() = %s, want %s", patchReq.Type(), types.MergePatchType) + } + var got map[string]interface{} + if err := json.Unmarshal([]byte(patchReq.String()), &got); err != nil { + t.Fatalf("patch json is malformed: %v", err) + } + spec := got["spec"].(map[string]interface{}) + if spec["minReadySeconds"] != float64(10) { + t.Fatalf("minReadySeconds = %v, want 10", spec["minReadySeconds"]) + } + updateStrategy := spec["updateStrategy"].(map[string]interface{}) + if updateStrategy["paused"] != true { + t.Fatalf("paused = %v, want true", updateStrategy["paused"]) + } + if updateStrategy["partition"] != float64(3) { + t.Fatalf("partition = %v", updateStrategy["partition"]) + } + if updateStrategy["maxSurge"] != "20%" { + t.Fatalf("maxSurge = %v", updateStrategy["maxSurge"]) + } + if updateStrategy["maxUnavailable"] != float64(1) { + t.Fatalf("maxUnavailable = %v", updateStrategy["maxUnavailable"]) + } +} diff --git a/pkg/util/pod_utils.go b/pkg/util/pod_utils.go index 93aad6d0..043d6755 100644 --- a/pkg/util/pod_utils.go +++ b/pkg/util/pod_utils.go @@ -112,6 +112,15 @@ func IsCompletedPod(pod *v1.Pod) bool { return pod.Status.Phase == v1.PodFailed || pod.Status.Phase == v1.PodSucceeded } +// IsPodActive returns true if a pod is not in a terminal phase and has not been +// marked for deletion. This mirrors the upstream Kubernetes IsPodActive check in +// pkg/controller/controller_utils.go. +func IsPodActive(pod *v1.Pod) bool { + return pod.Status.Phase != v1.PodSucceeded && + pod.Status.Phase != v1.PodFailed && + pod.DeletionTimestamp == nil +} + // ListOwnedPods will list all pods belong to workload, including terminating pods func ListOwnedPods(c client.Client, workload client.Object) ([]*v1.Pod, error) { selector, err := getSelector(workload) diff --git a/pkg/webhook/util/writer/fs.go b/pkg/webhook/util/writer/fs.go index 60962d14..831f1ac2 100644 --- a/pkg/webhook/util/writer/fs.go +++ b/pkg/webhook/util/writer/fs.go @@ -126,8 +126,9 @@ func prepareToWrite(dir string) error { switch { case os.IsNotExist(err): klog.Info("cert directory doesn't exist, creating", "directory", dir) - // TODO: figure out if we can reduce the permission. (Now it's 0777) - err = os.MkdirAll(dir, 0777) + // The directory holds the CA/server private keys, so restrict it to the + // owner (0700) instead of world-accessible 0777. + err = os.MkdirAll(dir, 0700) if err != nil { return fmt.Errorf("can't create dir: %v, err: %s", dir, err.Error()) } @@ -201,31 +202,32 @@ func ensureExist(dir string) error { } func certToProjectionMap(cert *generator.Artifacts) map[string]atomic.FileProjection { - // TODO: figure out if we can reduce the permission. (Now it's 0666) + // Private keys are restricted to the owner (0600); public certificates may + // stay world-readable (0644). return map[string]atomic.FileProjection{ CAKeyName: { Data: cert.CAKey, - Mode: 0666, + Mode: 0600, }, CACertName: { Data: cert.CACert, - Mode: 0666, + Mode: 0644, }, ServerCertName: { Data: cert.Cert, - Mode: 0666, + Mode: 0644, }, ServerCertName2: { Data: cert.Cert, - Mode: 0666, + Mode: 0644, }, ServerKeyName: { Data: cert.Key, - Mode: 0666, + Mode: 0600, }, ServerKeyName2: { Data: cert.Key, - Mode: 0666, + Mode: 0600, }, } } diff --git a/pkg/webhook/util/writer/fs_test.go b/pkg/webhook/util/writer/fs_test.go new file mode 100644 index 00000000..c48b50a8 --- /dev/null +++ b/pkg/webhook/util/writer/fs_test.go @@ -0,0 +1,77 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package writer + +import ( + "os" + "path" + "path/filepath" + "testing" + + "github.com/openkruise/rollouts/pkg/webhook/util/generator" +) + +// TestWriteCertsToDirPermissions asserts the cert directory and the files it +// holds get restrictive permissions: the directory is owner-only (0700), the +// CA/server private keys are 0600, and the public certificates may stay 0644. +// This locks down the fix for the over-permissive 0777 dir / 0666 key writes. +func TestWriteCertsToDirPermissions(t *testing.T) { + base := t.TempDir() + dir := filepath.Join(base, "certs") + + certs := &generator.Artifacts{ + CAKey: []byte("ca-key"), + CACert: []byte("ca-cert"), + Key: []byte("server-key"), + Cert: []byte("server-cert"), + } + + if err := WriteCertsToDir(dir, certs); err != nil { + t.Fatalf("WriteCertsToDir failed: %v", err) + } + + info, err := os.Stat(dir) + if err != nil { + t.Fatalf("stat dir failed: %v", err) + } + if perm := info.Mode().Perm(); perm != 0700 { + t.Fatalf("cert dir perm = %#o, want 0700", perm) + } + + privateKeys := []string{CAKeyName, ServerKeyName, ServerKeyName2} + for _, name := range privateKeys { + assertFilePerm(t, dir, name, 0600) + } + + publicCerts := []string{CACertName, ServerCertName, ServerCertName2} + for _, name := range publicCerts { + assertFilePerm(t, dir, name, 0644) + } +} + +func assertFilePerm(t *testing.T, dir, name string, want os.FileMode) { + t.Helper() + // The atomic writer exposes the payload through a symlink to a timestamped + // directory; Stat (not Lstat) follows it to the real file we care about. + info, err := os.Stat(path.Join(dir, name)) + if err != nil { + t.Fatalf("stat %s failed: %v", name, err) + } + if perm := info.Mode().Perm(); perm != want { + t.Fatalf("%s perm = %#o, want %#o", name, perm, want) + } +} diff --git a/pkg/webhook/workload/mutating/minready_deployment.go b/pkg/webhook/workload/mutating/minready_deployment.go new file mode 100644 index 00000000..0fb58eae --- /dev/null +++ b/pkg/webhook/workload/mutating/minready_deployment.go @@ -0,0 +1,216 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package mutating + +import ( + "fmt" + "strconv" + "strings" + + apps "k8s.io/api/apps/v1" + "k8s.io/apimachinery/pkg/util/intstr" + + appsv1beta1 "github.com/openkruise/rollouts/api/v1beta1" +) + +const ( + inflatedMinReadySeconds int32 = appsv1beta1.MaxReadySeconds + inflatedProgressDeadlineSeconds int32 = appsv1beta1.MaxProgressSeconds +) + +// enrollMinReadyDeployment snapshots the original strategy fields into +// annotations and inflates them in place. It lives in the webhook package so +// admission code does not depend on controller internals. +func enrollMinReadyDeployment(deployment *apps.Deployment) error { + if err := validateMinReadyDeploymentStrategyType(deployment); err != nil { + return err + } + snapshot := deployment.DeepCopy() + if err := enrollMinReadyOriginalAnnotations(snapshot, deployment); err != nil { + return err + } + inflateMinReadyDeploymentStrategy(deployment) + return nil +} + +func enrollMinReadyOriginalAnnotations(snapshot, target *apps.Deployment) error { + if !appsv1beta1.HasMinReadyOriginalAnnotations(snapshot.Annotations) { + writeMinReadyOriginalAnnotations(snapshot, target) + return nil + } + if err := ensureMinReadyOriginalAnnotations(snapshot); err != nil { + return err + } + if err := validateMinReadyInflatedDeploymentStrategy(snapshot); err != nil { + if !hasMinReadyOriginalAvailabilityChange(snapshot) { + return err + } + if err := validateMinReadyRefreshableDeployment(snapshot); err != nil { + return err + } + writeMinReadyOriginalAvailabilityAnnotations(snapshot, target) + } + return nil +} + +func writeMinReadyOriginalAnnotations(original, modified *apps.Deployment) { + writeMinReadyOriginalAvailabilityAnnotations(original, modified) + modified.Annotations[appsv1beta1.MinReadyOriginalMaxUnavailableAnnotation] = + serializeMinReadyOriginalIntOrString(originalMinReadyMaxUnavailable(original)) +} + +func writeMinReadyOriginalAvailabilityAnnotations(original, modified *apps.Deployment) { + if modified.Annotations == nil { + modified.Annotations = map[string]string{} + } + modified.Annotations[appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation] = + serializeMinReadyOriginalInt32(&original.Spec.MinReadySeconds, 0) + modified.Annotations[appsv1beta1.MinReadyOriginalProgressDeadlineSecondsAnnotation] = + serializeMinReadyOriginalInt32(original.Spec.ProgressDeadlineSeconds, appsv1beta1.MinReadyDefaultProgressDeadlineSeconds) +} + +func serializeMinReadyOriginalInt32(value *int32, defaultValue int32) string { + if value == nil { + return strconv.FormatInt(int64(defaultValue), 10) + } + return strconv.FormatInt(int64(*value), 10) +} + +func serializeMinReadyOriginalIntOrString(value *intstr.IntOrString) string { + if value == nil { + return appsv1beta1.MinReadyDefaultMaxUnavailable + } + if value.Type == intstr.String { + return value.StrVal + } + return strconv.FormatInt(int64(value.IntVal), 10) +} + +func originalMinReadyMaxUnavailable(deployment *apps.Deployment) *intstr.IntOrString { + if deployment.Spec.Strategy.RollingUpdate == nil { + return nil + } + return deployment.Spec.Strategy.RollingUpdate.MaxUnavailable +} + +func ensureMinReadyOriginalAnnotations(deployment *apps.Deployment) error { + if _, err := parseMinReadyOriginalInt32(deployment.Annotations, appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation); err != nil { + return err + } + if _, err := parseMinReadyOriginalInt32(deployment.Annotations, appsv1beta1.MinReadyOriginalProgressDeadlineSecondsAnnotation); err != nil { + return err + } + if _, err := parseMinReadyOriginalIntOrString(deployment.Annotations, appsv1beta1.MinReadyOriginalMaxUnavailableAnnotation); err != nil { + return err + } + return nil +} + +func parseMinReadyOriginalInt32(annotations map[string]string, key string) (*int32, error) { + raw, ok := annotations[key] + if !ok { + return nil, fmt.Errorf("annotation %s missing", key) + } + if raw == "" { + return nil, fmt.Errorf("annotation %s present but empty", key) + } + n, err := strconv.ParseInt(raw, 10, 32) + if err != nil { + return nil, fmt.Errorf("annotation %s malformed int32: %v", key, err) + } + v := int32(n) + return &v, nil +} + +func parseMinReadyOriginalIntOrString(annotations map[string]string, key string) (*intstr.IntOrString, error) { + raw, ok := annotations[key] + if !ok { + return nil, fmt.Errorf("annotation %s missing", key) + } + if raw == "" { + return nil, fmt.Errorf("annotation %s present but empty", key) + } + if strings.HasSuffix(raw, "%") { + if _, err := strconv.Atoi(strings.TrimSuffix(raw, "%")); err != nil { + return nil, fmt.Errorf("annotation %s malformed percent: %v", key, err) + } + v := intstr.FromString(raw) + return &v, nil + } + n, err := strconv.Atoi(raw) + if err != nil { + return nil, fmt.Errorf("annotation %s malformed int: %v", key, err) + } + v := intstr.FromInt(n) + return &v, nil +} + +func inflateMinReadyDeploymentStrategy(deployment *apps.Deployment) { + progressDeadlineSeconds := inflatedProgressDeadlineSeconds + maxUnavailable := intstr.FromInt(0) + deployment.Spec.Paused = false + deployment.Spec.MinReadySeconds = inflatedMinReadySeconds + deployment.Spec.ProgressDeadlineSeconds = &progressDeadlineSeconds + if deployment.Spec.Strategy.RollingUpdate == nil { + deployment.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{} + } + deployment.Spec.Strategy.RollingUpdate.MaxUnavailable = &maxUnavailable +} + +func validateMinReadyInflatedDeploymentStrategy(deployment *apps.Deployment) error { + if err := validateMinReadyDeploymentStrategyType(deployment); err != nil { + return err + } + if deployment.Spec.Paused { + return fmt.Errorf("deployment is paused") + } + if deployment.Spec.MinReadySeconds != inflatedMinReadySeconds { + return fmt.Errorf("minReadySeconds=%d want %d", deployment.Spec.MinReadySeconds, inflatedMinReadySeconds) + } + if deployment.Spec.ProgressDeadlineSeconds == nil || *deployment.Spec.ProgressDeadlineSeconds != inflatedProgressDeadlineSeconds { + return fmt.Errorf("progressDeadlineSeconds=%v want %d", deployment.Spec.ProgressDeadlineSeconds, inflatedProgressDeadlineSeconds) + } + if deployment.Spec.Strategy.RollingUpdate == nil { + return fmt.Errorf("rollingUpdate is nil") + } + return nil +} + +func hasMinReadyOriginalAvailabilityChange(deployment *apps.Deployment) bool { + if deployment.Spec.MinReadySeconds != inflatedMinReadySeconds { + return true + } + return deployment.Spec.ProgressDeadlineSeconds == nil || + *deployment.Spec.ProgressDeadlineSeconds != inflatedProgressDeadlineSeconds +} + +func validateMinReadyRefreshableDeployment(deployment *apps.Deployment) error { + if deployment.Spec.Paused { + return fmt.Errorf("deployment is paused") + } + if deployment.Spec.Strategy.RollingUpdate == nil { + return fmt.Errorf("rollingUpdate is nil") + } + return nil +} + +func validateMinReadyDeploymentStrategyType(deployment *apps.Deployment) error { + if deployment.Spec.Strategy.Type != apps.RollingUpdateDeploymentStrategyType { + return fmt.Errorf("deployment strategy type %s is not RollingUpdate", deployment.Spec.Strategy.Type) + } + return nil +} diff --git a/pkg/webhook/workload/mutating/minready_deployment_test.go b/pkg/webhook/workload/mutating/minready_deployment_test.go new file mode 100644 index 00000000..1c8db757 --- /dev/null +++ b/pkg/webhook/workload/mutating/minready_deployment_test.go @@ -0,0 +1,135 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package mutating + +import ( + "testing" + + apps "k8s.io/api/apps/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/pointer" + + appsv1beta1 "github.com/openkruise/rollouts/api/v1beta1" +) + +func TestEnrollMinReadyDeploymentSnapshotsAndInflates(t *testing.T) { + deployment := newWebhookMinReadyDeployment() + + if err := enrollMinReadyDeployment(deployment); err != nil { + t.Fatalf("enrollMinReadyDeployment failed: %v", err) + } + + assertWebhookMinReadyAnnotation(t, deployment, appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation, "7") + assertWebhookMinReadyAnnotation(t, deployment, appsv1beta1.MinReadyOriginalProgressDeadlineSecondsAnnotation, "60") + assertWebhookMinReadyAnnotation(t, deployment, appsv1beta1.MinReadyOriginalMaxUnavailableAnnotation, "25%") + assertWebhookMinReadyInflated(t, deployment) +} + +func TestEnrollMinReadyDeploymentValidatesExistingAnnotations(t *testing.T) { + deployment := newWebhookInflatedMinReadyDeployment() + addWebhookMinReadyOriginalAnnotations(deployment) + original := deployment.Annotations[appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation] + + if err := enrollMinReadyDeployment(deployment); err != nil { + t.Fatalf("enrollMinReadyDeployment failed: %v", err) + } + + if deployment.Annotations[appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation] != original { + t.Fatalf("original annotation was rewritten: %q -> %q", original, deployment.Annotations[appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation]) + } +} + +func TestEnrollMinReadyDeploymentRefreshesAvailabilityAnnotationsForContinuousRelease(t *testing.T) { + deployment := newWebhookInflatedMinReadyDeployment() + addWebhookMinReadyOriginalAnnotations(deployment) + deployment.Spec.MinReadySeconds = 9 + deployment.Spec.ProgressDeadlineSeconds = pointer.Int32(90) + + if err := enrollMinReadyDeployment(deployment); err != nil { + t.Fatalf("enrollMinReadyDeployment failed: %v", err) + } + + assertWebhookMinReadyAnnotation(t, deployment, appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation, "9") + assertWebhookMinReadyAnnotation(t, deployment, appsv1beta1.MinReadyOriginalProgressDeadlineSecondsAnnotation, "90") + assertWebhookMinReadyAnnotation(t, deployment, appsv1beta1.MinReadyOriginalMaxUnavailableAnnotation, "25%") + assertWebhookMinReadyInflated(t, deployment) +} + +func TestEnrollMinReadyDeploymentRejectsRecreate(t *testing.T) { + deployment := newWebhookMinReadyDeployment() + deployment.Spec.Strategy.Type = apps.RecreateDeploymentStrategyType + + if err := enrollMinReadyDeployment(deployment); err == nil { + t.Fatalf("enrollMinReadyDeployment accepted Recreate strategy, want error") + } +} + +func newWebhookMinReadyDeployment() *apps.Deployment { + progressDeadline := int32(60) + maxUnavailable := intstr.FromString("25%") + maxSurge := intstr.FromInt(1) + return &apps.Deployment{ + ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{}}, + Spec: apps.DeploymentSpec{ + MinReadySeconds: 7, + ProgressDeadlineSeconds: &progressDeadline, + Strategy: apps.DeploymentStrategy{ + Type: apps.RollingUpdateDeploymentStrategyType, + RollingUpdate: &apps.RollingUpdateDeployment{ + MaxUnavailable: &maxUnavailable, + MaxSurge: &maxSurge, + }, + }, + }, + } +} + +func newWebhookInflatedMinReadyDeployment() *apps.Deployment { + deployment := newWebhookMinReadyDeployment() + inflateMinReadyDeploymentStrategy(deployment) + return deployment +} + +func addWebhookMinReadyOriginalAnnotations(deployment *apps.Deployment) { + if deployment.Annotations == nil { + deployment.Annotations = map[string]string{} + } + deployment.Annotations[appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation] = "7" + deployment.Annotations[appsv1beta1.MinReadyOriginalProgressDeadlineSecondsAnnotation] = "60" + deployment.Annotations[appsv1beta1.MinReadyOriginalMaxUnavailableAnnotation] = "25%" +} + +func assertWebhookMinReadyAnnotation(t *testing.T, deployment *apps.Deployment, key, want string) { + t.Helper() + if got := deployment.Annotations[key]; got != want { + t.Fatalf("annotation %s = %q, want %q", key, got, want) + } +} + +func assertWebhookMinReadyInflated(t *testing.T, deployment *apps.Deployment) { + t.Helper() + if deployment.Spec.MinReadySeconds != inflatedMinReadySeconds { + t.Fatalf("minReadySeconds = %d, want %d", deployment.Spec.MinReadySeconds, inflatedMinReadySeconds) + } + if deployment.Spec.ProgressDeadlineSeconds == nil || *deployment.Spec.ProgressDeadlineSeconds != inflatedProgressDeadlineSeconds { + t.Fatalf("progressDeadlineSeconds = %v, want %d", deployment.Spec.ProgressDeadlineSeconds, inflatedProgressDeadlineSeconds) + } + if unavailable := deployment.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 0 { + t.Fatalf("maxUnavailable = %v, want 0", unavailable) + } +} diff --git a/pkg/webhook/workload/mutating/workload_update_handler.go b/pkg/webhook/workload/mutating/workload_update_handler.go index 5726a384..a1bc2b5c 100644 --- a/pkg/webhook/workload/mutating/workload_update_handler.go +++ b/pkg/webhook/workload/mutating/workload_update_handler.go @@ -41,8 +41,10 @@ import ( appsv1alpha1 "github.com/openkruise/rollouts/api/v1alpha1" appsv1beta1 "github.com/openkruise/rollouts/api/v1beta1" + "github.com/openkruise/rollouts/pkg/feature" "github.com/openkruise/rollouts/pkg/util" utilclient "github.com/openkruise/rollouts/pkg/util/client" + utilfeature "github.com/openkruise/rollouts/pkg/util/feature" util2 "github.com/openkruise/rollouts/pkg/webhook/util" "github.com/openkruise/rollouts/pkg/webhook/util/configuration" ) @@ -238,6 +240,16 @@ func (h *WorkloadHandler) handleDeployment(newObj, oldObj *apps.Deployment) (boo // in rollout progressing if newObj.Annotations[util.InRolloutProgressingAnnotation] != "" { modified := false + if isMinReadySecondsStrategy(rollout, newObj) { + if isEffectiveDeploymentRevisionChange(oldObj, newObj) { + if err := enrollMinReadyDeployment(newObj); err != nil { + klog.Warningf("Skip MinReady continuous enrollment for Deployment(%s/%s): %v", newObj.Namespace, newObj.Name, err) + return enforceMinReadyInflation(newObj), nil + } + return true, nil + } + return enforceMinReadyInflation(newObj), nil + } strategy := util.GetDeploymentStrategy(newObj) // partition if strings.EqualFold(string(strategy.RollingStyle), string(appsv1alpha1.PartitionRollingStyle)) { @@ -324,8 +336,22 @@ func (h *WorkloadHandler) handleDeployment(newObj, oldObj *apps.Deployment) (boo newObj.Labels[appsv1alpha1.DeploymentStableRevisionLabel] = stableRS.Labels[apps.DefaultDeploymentUniqueLabelKey] } - // need set workload paused = true - newObj.Spec.Paused = true + if isMinReadySecondsStrategy(rollout, newObj) { + // MinReady keeps the native controller running, so it must NOT be paused. + // Inflate the strategy synchronously at admission time: this snapshots the + // original fields into annotations and sets minReadySeconds/maxUnavailable + // so the native controller never observes the user's original budget in the + // window between admission and MinReadyControl.Initialize. Continuous + // releases refresh user-owned availability annotations before re-inflation. + if err := enrollMinReadyDeployment(newObj); err != nil { + // Do not block admission; the controller's Initialize will surface a + // degraded condition for an unsupported strategy instead. + klog.Warningf("Skip MinReady enrollment for Deployment(%s/%s): %v", newObj.Namespace, newObj.Name, err) + } + } else { + // Partition/Recreate style disables the native Deployment controller. + newObj.Spec.Paused = true + } state := &util.RolloutState{RolloutName: rollout.Name} by, _ := json.Marshal(state) if newObj.Annotations == nil { @@ -451,6 +477,60 @@ func isEffectiveDeploymentRevisionChange(oldObj, newObj *apps.Deployment) bool { return true } +// isMinReadySecondsStrategy reports whether the Deployment should be driven by +// the MinReadySeconds strategy (keep RollingUpdate, do not pause) instead of +// the legacy Recreate-style mutation. +// +// It only checks Canary because a Rollout cannot declare BlueGreen and Canary at +// the same time: the validating webhook rejects that combination +// (pkg/webhook/rollout/validating/rollout_create_update_handler.go, +// "Canary and BlueGreen cannot both be set"). When the feature gate is disabled +// mid-rollout, the DeploymentStrategyAnnotation keeps this symmetric with the +// executor's MinReady annotation fallback. +func isMinReadySecondsStrategy(rollout *appsv1beta1.Rollout, deployment *apps.Deployment) bool { + if rollout.Spec.Strategy.Canary == nil || rollout.Spec.Strategy.Canary.EnableExtraWorkloadForCanary { + return false + } + if utilfeature.DefaultFeatureGate.Enabled(feature.MinReadySecondsStrategy) { + return true + } + strategy := util.GetDeploymentStrategy(deployment) + return strings.EqualFold(string(strategy.RollingStyle), string(appsv1alpha1.PartitionRollingStyle)) +} + +func enforceMinReadyInflation(deployment *apps.Deployment) bool { + if !appsv1beta1.HasMinReadyOriginalAnnotations(deployment.Annotations) { + return false + } + modified := false + // The MinReady strategy relies on the native RollingUpdate controller staying + // active and driven by inflated fields. Re-assert the core invariants here so a + // GitOps/manual write of Recreate or paused=true is rejected at admission time + // rather than only surfacing as a controller-side degraded condition later. + if deployment.Spec.Strategy.Type != apps.RollingUpdateDeploymentStrategyType { + deployment.Spec.Strategy.Type = apps.RollingUpdateDeploymentStrategyType + modified = true + } + if deployment.Spec.Paused { + deployment.Spec.Paused = false + modified = true + } + if deployment.Spec.Strategy.RollingUpdate == nil { + deployment.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{} + modified = true + } + if deployment.Spec.MinReadySeconds != inflatedMinReadySeconds { + deployment.Spec.MinReadySeconds = inflatedMinReadySeconds + modified = true + } + if deployment.Spec.ProgressDeadlineSeconds == nil || *deployment.Spec.ProgressDeadlineSeconds != inflatedProgressDeadlineSeconds { + progressDeadlineSeconds := inflatedProgressDeadlineSeconds + deployment.Spec.ProgressDeadlineSeconds = &progressDeadlineSeconds + modified = true + } + return modified +} + func setDeploymentStrategyAnnotation(strategy appsv1alpha1.DeploymentStrategy, d *apps.Deployment) { strategyAnno, _ := json.Marshal(&strategy) d.Annotations[appsv1alpha1.DeploymentStrategyAnnotation] = string(strategyAnno) diff --git a/pkg/webhook/workload/mutating/workload_update_handler_test.go b/pkg/webhook/workload/mutating/workload_update_handler_test.go index 8d7a0732..db20a52b 100644 --- a/pkg/webhook/workload/mutating/workload_update_handler_test.go +++ b/pkg/webhook/workload/mutating/workload_update_handler_test.go @@ -44,7 +44,9 @@ import ( rolloutapi "github.com/openkruise/rollouts/api" appsv1alpha1 "github.com/openkruise/rollouts/api/v1alpha1" appsv1beta1 "github.com/openkruise/rollouts/api/v1beta1" + "github.com/openkruise/rollouts/pkg/feature" "github.com/openkruise/rollouts/pkg/util" + utilfeature "github.com/openkruise/rollouts/pkg/util/feature" "github.com/openkruise/rollouts/pkg/webhook/util/configuration" ) @@ -419,6 +421,44 @@ func TestHandlerDeployment(t *testing.T) { return rolloutDemo.DeepCopy() }, }, + { + name: "deployment image v1->v2, matched minready rollout inflates strategy at admission and stays unpaused", + getObjs: func() (*apps.Deployment, *apps.Deployment) { + oldObj := deploymentDemo.DeepCopy() + oldObj.Spec.Strategy.Type = apps.RollingUpdateDeploymentStrategyType + newObj := deploymentDemo.DeepCopy() + newObj.Spec.Strategy.Type = apps.RollingUpdateDeploymentStrategyType + newObj.Spec.Template.Spec.Containers[0].Image = "echoserver:v2" + return oldObj, newObj + }, + expectObj: func() *apps.Deployment { + obj := deploymentDemo.DeepCopy() + obj.Spec.Strategy.Type = apps.RollingUpdateDeploymentStrategyType + obj.Spec.Template.Spec.Containers[0].Image = "echoserver:v2" + obj.Annotations[util.InRolloutProgressingAnnotation] = `{"rolloutName":"rollout-demo"}` + // P1-6: enrollment snapshots original strategy fields and inflates + // minReadySeconds/progressDeadline/maxUnavailable synchronously so the + // native controller never observes the original budget before Initialize. + obj.Annotations[appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation] = "0" + obj.Annotations[appsv1beta1.MinReadyOriginalProgressDeadlineSecondsAnnotation] = "600" + obj.Annotations[appsv1beta1.MinReadyOriginalMaxUnavailableAnnotation] = "25%" + obj.Spec.Paused = false + obj.Spec.MinReadySeconds = inflatedMinReadySeconds + pds := inflatedProgressDeadlineSeconds + obj.Spec.ProgressDeadlineSeconds = &pds + maxUnavailable := intstr.FromInt(0) + obj.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{MaxUnavailable: &maxUnavailable} + return obj + }, + getRs: func() []*apps.ReplicaSet { + rs := rsDemo.DeepCopy() + return []*apps.ReplicaSet{rs} + }, + getRollout: func() *appsv1beta1.Rollout { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + return rolloutDemo.DeepCopy() + }, + }, { name: "deployment image v1->v2, no matched rollout", getObjs: func() (*apps.Deployment, *apps.Deployment) { @@ -541,6 +581,118 @@ func TestHandlerDeployment(t *testing.T) { return rolloutDemo.DeepCopy() }, }, + { + name: "minready deployment in progressing skips recreate mutation", + getObjs: func() (*apps.Deployment, *apps.Deployment) { + oldObj := deploymentDemo.DeepCopy() + oldObj.Spec.Template.Spec.Containers[0].Image = "echoserver:v2" + oldObj.Annotations[util.InRolloutProgressingAnnotation] = `{"rolloutName":"rollout-demo","RolloutDone":false}` + newObj := oldObj.DeepCopy() + newObj.Spec.Paused = false + newObj.Spec.Strategy.Type = apps.RollingUpdateDeploymentStrategyType + newObj.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{ + MaxUnavailable: &intstr.IntOrString{Type: intstr.Int, IntVal: 2}, + MaxSurge: &intstr.IntOrString{Type: intstr.Int, IntVal: 0}, + } + newObj.Annotations[appsv1alpha1.DeploymentStrategyAnnotation] = `{"rollingStyle":"Partition"}` + return oldObj, newObj + }, + expectObj: func() *apps.Deployment { + obj := deploymentDemo.DeepCopy() + obj.Spec.Template.Spec.Containers[0].Image = "echoserver:v2" + obj.Annotations[util.InRolloutProgressingAnnotation] = `{"rolloutName":"rollout-demo","RolloutDone":false}` + obj.Spec.Paused = false + obj.Spec.Strategy.Type = apps.RollingUpdateDeploymentStrategyType + obj.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{ + MaxUnavailable: &intstr.IntOrString{Type: intstr.Int, IntVal: 2}, + MaxSurge: &intstr.IntOrString{Type: intstr.Int, IntVal: 0}, + } + obj.Annotations[appsv1alpha1.DeploymentStrategyAnnotation] = `{"rollingStyle":"Partition"}` + return obj + }, + getRs: func() []*apps.ReplicaSet { + rs := rsDemo.DeepCopy() + return []*apps.ReplicaSet{rs} + }, + getRollout: func() *appsv1beta1.Rollout { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + return rolloutDemo.DeepCopy() + }, + }, + { + name: "minready deployment in progressing without strategy annotation keeps deployment unpaused", + getObjs: func() (*apps.Deployment, *apps.Deployment) { + oldObj := deploymentDemo.DeepCopy() + oldObj.Spec.Template.Spec.Containers[0].Image = "echoserver:v2" + oldObj.Annotations[util.InRolloutProgressingAnnotation] = `{"rolloutName":"rollout-demo","RolloutDone":false}` + newObj := oldObj.DeepCopy() + newObj.Spec.Paused = false + return oldObj, newObj + }, + expectObj: func() *apps.Deployment { + obj := deploymentDemo.DeepCopy() + obj.Spec.Template.Spec.Containers[0].Image = "echoserver:v2" + obj.Annotations[util.InRolloutProgressingAnnotation] = `{"rolloutName":"rollout-demo","RolloutDone":false}` + obj.Spec.Paused = false + return obj + }, + getRs: func() []*apps.ReplicaSet { + rs := rsDemo.DeepCopy() + return []*apps.ReplicaSet{rs} + }, + getRollout: func() *appsv1beta1.Rollout { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + return rolloutDemo.DeepCopy() + }, + }, + { + name: "minready continuous release refreshes original availability annotations", + getObjs: func() (*apps.Deployment, *apps.Deployment) { + oldObj := deploymentDemo.DeepCopy() + oldObj.Spec.Template.Spec.Containers[0].Image = "echoserver:v2" + oldObj.Annotations[util.InRolloutProgressingAnnotation] = `{"rolloutName":"rollout-demo","RolloutDone":false}` + oldObj.Annotations[appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation] = "7" + oldObj.Annotations[appsv1beta1.MinReadyOriginalProgressDeadlineSecondsAnnotation] = "60" + oldObj.Annotations[appsv1beta1.MinReadyOriginalMaxUnavailableAnnotation] = "25%" + oldObj.Spec.Paused = false + oldObj.Spec.Strategy.Type = apps.RollingUpdateDeploymentStrategyType + maxUnavailable := intstr.FromInt(0) + oldObj.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{MaxUnavailable: &maxUnavailable} + oldObj.Spec.MinReadySeconds = inflatedMinReadySeconds + inflatedPDS := inflatedProgressDeadlineSeconds + oldObj.Spec.ProgressDeadlineSeconds = &inflatedPDS + + newObj := oldObj.DeepCopy() + newObj.Spec.Template.Spec.Containers[0].Image = "echoserver:v3" + newObj.Spec.MinReadySeconds = 9 + newObj.Spec.ProgressDeadlineSeconds = pointer.Int32(90) + return oldObj, newObj + }, + expectObj: func() *apps.Deployment { + obj := deploymentDemo.DeepCopy() + obj.Spec.Template.Spec.Containers[0].Image = "echoserver:v3" + obj.Annotations[util.InRolloutProgressingAnnotation] = `{"rolloutName":"rollout-demo","RolloutDone":false}` + obj.Annotations[appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation] = "9" + obj.Annotations[appsv1beta1.MinReadyOriginalProgressDeadlineSecondsAnnotation] = "90" + obj.Annotations[appsv1beta1.MinReadyOriginalMaxUnavailableAnnotation] = "25%" + obj.Spec.Paused = false + obj.Spec.Strategy.Type = apps.RollingUpdateDeploymentStrategyType + maxUnavailable := intstr.FromInt(0) + obj.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{MaxUnavailable: &maxUnavailable} + obj.Spec.MinReadySeconds = inflatedMinReadySeconds + inflatedPDS := inflatedProgressDeadlineSeconds + obj.Spec.ProgressDeadlineSeconds = &inflatedPDS + return obj + }, + getRs: func() []*apps.ReplicaSet { + rs := rsDemo.DeepCopy() + return []*apps.ReplicaSet{rs} + }, + getRollout: func() *appsv1beta1.Rollout { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + return rolloutDemo.DeepCopy() + }, + }, { name: "set deployment paused = false, matched rollout, in finalising, allow", getObjs: func() (*apps.Deployment, *apps.Deployment) { @@ -744,6 +896,7 @@ func TestHandlerDeployment(t *testing.T) { decoder := admission.NewDecoder(scheme) for _, cs := range cases { t.Run(cs.name, func(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=false") client := fake.NewClientBuilder().WithScheme(scheme).Build() h := WorkloadHandler{ Client: client, @@ -778,6 +931,127 @@ func TestHandlerDeployment(t *testing.T) { } } +func TestIsMinReadySecondsStrategy(t *testing.T) { + rollout := rolloutDemo.DeepCopy() + deployment := deploymentDemo.DeepCopy() + if deployment.Annotations == nil { + deployment.Annotations = map[string]string{} + } + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=false") + if isMinReadySecondsStrategy(rollout, deployment) { + t.Fatalf("skip returned true while feature gate is disabled") + } + deployment.Annotations[appsv1alpha1.DeploymentStrategyAnnotation] = `{"rollingStyle":"Partition"}` + if !isMinReadySecondsStrategy(rollout, deployment) { + t.Fatalf("skip returned false for in-progress MinReady Deployment with strategy annotation") + } + delete(deployment.Annotations, appsv1alpha1.DeploymentStrategyAnnotation) + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + if !isMinReadySecondsStrategy(rollout, deployment) { + t.Fatalf("skip returned false for MinReadySeconds with feature gate enabled") + } + rollout.Spec.Strategy.Canary.EnableExtraWorkloadForCanary = true + if isMinReadySecondsStrategy(rollout, deployment) { + t.Fatalf("skip returned true for canary-style rollout") + } +} + +// inflatedMinReadyDeployment returns a Deployment in a healthy inflated MinReady +// state: RollingUpdate, unpaused, with original-strategy annotations present. +func inflatedMinReadyDeployment() *apps.Deployment { + pds := inflatedProgressDeadlineSeconds + maxUnavailable := intstr.FromInt(0) + return &apps.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + appsv1beta1.MinReadyOriginalMinReadySecondsAnnotation: "0", + appsv1beta1.MinReadyOriginalProgressDeadlineSecondsAnnotation: "600", + appsv1beta1.MinReadyOriginalMaxUnavailableAnnotation: "25%", + }, + }, + Spec: apps.DeploymentSpec{ + Paused: false, + MinReadySeconds: inflatedMinReadySeconds, + ProgressDeadlineSeconds: &pds, + Strategy: apps.DeploymentStrategy{ + Type: apps.RollingUpdateDeploymentStrategyType, + RollingUpdate: &apps.RollingUpdateDeployment{MaxUnavailable: &maxUnavailable}, + }, + }, + } +} + +// TestEnforceMinReadyInflation covers P0-2: while a MinReady rollout is +// progressing, the webhook must re-assert the core invariants (RollingUpdate, +// unpaused, non-nil rollingUpdate, inflated fields) so a GitOps/manual drift is +// rejected at admission time rather than only surfacing later in the controller. +func TestEnforceMinReadyInflation(t *testing.T) { + t.Run("no MinReady annotations leaves object untouched", func(t *testing.T) { + d := &apps.Deployment{Spec: apps.DeploymentSpec{Strategy: apps.DeploymentStrategy{Type: apps.RecreateDeploymentStrategyType}}} + if enforceMinReadyInflation(d) { + t.Fatalf("expected no modification without MinReady annotations") + } + if d.Spec.Strategy.Type != apps.RecreateDeploymentStrategyType { + t.Fatalf("strategy type changed unexpectedly: %s", d.Spec.Strategy.Type) + } + }) + + t.Run("healthy inflated state is not modified", func(t *testing.T) { + d := inflatedMinReadyDeployment() + if enforceMinReadyInflation(d) { + t.Fatalf("expected no modification for an already-inflated healthy deployment") + } + }) + + t.Run("strategy type drift to Recreate is rewritten", func(t *testing.T) { + d := inflatedMinReadyDeployment() + d.Spec.Strategy.Type = apps.RecreateDeploymentStrategyType + if !enforceMinReadyInflation(d) { + t.Fatalf("expected modification for strategy type drift") + } + if d.Spec.Strategy.Type != apps.RollingUpdateDeploymentStrategyType { + t.Fatalf("strategy type not restored to RollingUpdate: %s", d.Spec.Strategy.Type) + } + }) + + t.Run("paused drift is reverted", func(t *testing.T) { + d := inflatedMinReadyDeployment() + d.Spec.Paused = true + if !enforceMinReadyInflation(d) { + t.Fatalf("expected modification for paused drift") + } + if d.Spec.Paused { + t.Fatalf("paused not reverted to false") + } + }) + + t.Run("nil rollingUpdate is restored", func(t *testing.T) { + d := inflatedMinReadyDeployment() + d.Spec.Strategy.RollingUpdate = nil + if !enforceMinReadyInflation(d) { + t.Fatalf("expected modification for nil rollingUpdate") + } + if d.Spec.Strategy.RollingUpdate == nil { + t.Fatalf("rollingUpdate not restored") + } + }) + + t.Run("deflated fields are re-inflated", func(t *testing.T) { + d := inflatedMinReadyDeployment() + d.Spec.MinReadySeconds = 5 + d.Spec.ProgressDeadlineSeconds = pointer.Int32(600) + if !enforceMinReadyInflation(d) { + t.Fatalf("expected modification for deflated fields") + } + if d.Spec.MinReadySeconds != inflatedMinReadySeconds { + t.Fatalf("minReadySeconds not re-inflated: %d", d.Spec.MinReadySeconds) + } + if d.Spec.ProgressDeadlineSeconds == nil || *d.Spec.ProgressDeadlineSeconds != inflatedProgressDeadlineSeconds { + t.Fatalf("progressDeadlineSeconds not re-inflated: %v", d.Spec.ProgressDeadlineSeconds) + } + }) +} + func TestHandlerCloneSet(t *testing.T) { cases := []struct { name string diff --git a/test/e2e/minready/deployment_minready_actions_test.go b/test/e2e/minready/deployment_minready_actions_test.go new file mode 100644 index 00000000..48965eac --- /dev/null +++ b/test/e2e/minready/deployment_minready_actions_test.go @@ -0,0 +1,171 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package minready + +import ( + "context" + "strconv" + "strings" + "time" + + . "github.com/onsi/gomega" + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/util/retry" + "k8s.io/utils/pointer" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/openkruise/rollouts/api/v1beta1" +) + +func mergeEnvVar(original []corev1.EnvVar, add corev1.EnvVar) []corev1.EnvVar { + newEnvs := make([]corev1.EnvVar, 0, len(original)+1) + for _, env := range original { + if add.Name == env.Name { + continue + } + newEnvs = append(newEnvs, env) + } + return append(newEnvs, add) +} + +func finishMinReadyE2ERollout(namespace, name string) { + completeMinReadyE2ERollout(namespace, name) + waitMinReadyE2EDeploymentRestored(namespace) +} + +func finishMinReadyE2ERolloutWithAvailability(namespace, name string, minReadySeconds, progressDeadlineSeconds int32) { + completeMinReadyE2ERollout(namespace, name) + waitMinReadyE2EDeploymentRestoredWithAvailability(namespace, minReadySeconds, progressDeadlineSeconds) +} + +func waitMinReadyE2EDeploymentReady(namespace string) { + Eventually(func() bool { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + if err := k8sClient.Get(context.TODO(), key, deployment); err != nil { + return false + } + return deployment.Status.ReadyReplicas == *deployment.Spec.Replicas + }, 10*time.Minute, time.Second).Should(BeTrue()) +} + +func waitMinReadyE2ERolloutStepPaused(namespace, name string, step int32) { + Eventually(func() bool { + rollout := &v1beta1.Rollout{} + key := types.NamespacedName{Namespace: namespace, Name: name} + Expect(k8sClient.Get(context.TODO(), key, rollout)).NotTo(HaveOccurred()) + return rollout.Status.CanaryStatus != nil && + rollout.Status.CanaryStatus.CurrentStepIndex == step && + rollout.Status.CanaryStatus.CurrentStepState == v1beta1.CanaryStepStatePaused + }, 10*time.Minute, time.Second).Should(BeTrue()) +} + +func patchMinReadyE2EDeploymentReplicas(namespace string, replicas int32) { + Expect(retry.RetryOnConflict(retry.DefaultRetry, func() error { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + if err := k8sClient.Get(context.TODO(), key, deployment); err != nil { + return err + } + deployment.Spec.Replicas = pointer.Int32(replicas) + return k8sClient.Update(context.TODO(), deployment) + })).NotTo(HaveOccurred()) +} + +func updateMinReadyE2EDeploymentContinuousRelease(namespace, version string, minReadySeconds, progressDeadlineSeconds int32) { + Expect(retry.RetryOnConflict(retry.DefaultRetry, func() error { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + if err := k8sClient.Get(context.TODO(), key, deployment); err != nil { + return err + } + deployment.Spec.Template.Spec.Containers[0].Env = mergeEnvVar( + deployment.Spec.Template.Spec.Containers[0].Env, + corev1.EnvVar{Name: "NODE_NAME", Value: version}, + ) + deployment.Spec.MinReadySeconds = minReadySeconds + deployment.Spec.ProgressDeadlineSeconds = pointer.Int32(progressDeadlineSeconds) + return k8sClient.Update(context.TODO(), deployment) + })).NotTo(HaveOccurred()) +} + +func patchMinReadyE2EMaxUnavailable(namespace string, value int) { + Expect(retry.RetryOnConflict(retry.DefaultRetry, func() error { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + if err := k8sClient.Get(context.TODO(), key, deployment); err != nil { + return err + } + maxUnavailable := intstr.FromInt(value) + deployment.Spec.Strategy.RollingUpdate.MaxUnavailable = &maxUnavailable + return k8sClient.Update(context.TODO(), deployment) + })).NotTo(HaveOccurred()) +} + +func deleteMinReadyE2EOriginalAnnotation(namespace, key string) { + Expect(retry.RetryOnConflict(retry.DefaultRetry, func() error { + deployment := &apps.Deployment{} + namespacedName := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + if err := k8sClient.Get(context.TODO(), namespacedName, deployment); err != nil { + return err + } + delete(deployment.Annotations, key) + return k8sClient.Update(context.TODO(), deployment) + })).NotTo(HaveOccurred()) +} + +func restartMinReadyE2EControllerManager() { + Expect(retry.RetryOnConflict(retry.DefaultRetry, func() error { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: "kruise-rollout", Name: "kruise-rollout-controller-manager"} + if err := k8sClient.Get(context.TODO(), key, deployment); err != nil { + return err + } + if deployment.Spec.Template.Annotations == nil { + deployment.Spec.Template.Annotations = map[string]string{} + } + deployment.Spec.Template.Annotations["rollouts.kruise.io/minready-e2e-restart"] = strconv.FormatInt(time.Now().UnixNano(), 10) + return k8sClient.Update(context.TODO(), deployment) + })).NotTo(HaveOccurred()) + waitMinReadyE2EWebhookEndpointReady() +} + +func expectMinReadyE2EDeploymentVersion(namespace, version string) { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + Expect(k8sClient.Get(context.TODO(), key, deployment)).Should(Succeed()) + got := "" + for _, env := range deployment.Spec.Template.Spec.Containers[0].Env { + if env.Name == "NODE_NAME" { + got = env.Value + } + } + Expect(got).Should(Equal(version)) +} + +func expectMinReadyE2ENoVersion2Pods(namespace string) { + pods := &corev1.PodList{} + Expect(k8sClient.List(context.TODO(), pods, client.InNamespace(namespace))).Should(Succeed()) + for _, pod := range pods.Items { + for _, container := range pod.Spec.Containers { + Expect(strings.Contains(container.Image, "version2")).Should(BeFalse()) + } + } +} diff --git a/test/e2e/minready/deployment_minready_helpers_test.go b/test/e2e/minready/deployment_minready_helpers_test.go new file mode 100644 index 00000000..8332ba41 --- /dev/null +++ b/test/e2e/minready/deployment_minready_helpers_test.go @@ -0,0 +1,283 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package minready + +import ( + "context" + "fmt" + "time" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/util/retry" + "k8s.io/utils/pointer" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/openkruise/rollouts/api/v1beta1" + partitiondeployment "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment" +) + +const minReadyE2EDeploymentName = "minready-demo" + +func newMinReadyE2EDeployment(namespace string) *apps.Deployment { + maxUnavailable := intstr.FromString("25%") + maxSurge := intstr.FromInt(1) + return &apps.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: minReadyE2EDeploymentName, + Namespace: namespace, + Labels: map[string]string{"app": minReadyE2EDeploymentName}, + }, + Spec: apps.DeploymentSpec{ + Replicas: pointer.Int32(5), + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": minReadyE2EDeploymentName}}, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"app": minReadyE2EDeploymentName}}, + Spec: corev1.PodSpec{Containers: []corev1.Container{{ + Name: "echoserver", + Image: "cilium/echoserver:latest", + ImagePullPolicy: corev1.PullIfNotPresent, + Env: []corev1.EnvVar{{Name: "NODE_NAME", Value: "version1"}}, + }}}, + }, + Strategy: apps.DeploymentStrategy{ + Type: apps.RollingUpdateDeploymentStrategyType, + RollingUpdate: &apps.RollingUpdateDeployment{ + MaxUnavailable: &maxUnavailable, + MaxSurge: &maxSurge, + }, + }, + }, + } +} + +func newMinReadyE2ERollout(namespace string) *v1beta1.Rollout { + return &v1beta1.Rollout{ + ObjectMeta: metav1.ObjectMeta{Name: "minready-rollout", Namespace: namespace}, + Spec: v1beta1.RolloutSpec{ + WorkloadRef: v1beta1.ObjectRef{APIVersion: "apps/v1", Kind: "Deployment", Name: minReadyE2EDeploymentName}, + Strategy: v1beta1.RolloutStrategy{Canary: &v1beta1.CanaryStrategy{ + EnableExtraWorkloadForCanary: false, + Steps: []v1beta1.CanaryStep{ + {Replicas: intstrPtr(intstr.FromString("20%")), Pause: v1beta1.RolloutPause{}}, + {Replicas: intstrPtr(intstr.FromString("50%")), Pause: v1beta1.RolloutPause{}}, + {Replicas: intstrPtr(intstr.FromString("100%")), Pause: v1beta1.RolloutPause{Duration: pointer.Int32(0)}}, + }, + }}, + }, + } +} + +func newMinReadyE2EPDB(namespace string) *policyv1.PodDisruptionBudget { + return &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{Name: "minready-pdb", Namespace: namespace}, + Spec: policyv1.PodDisruptionBudgetSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": minReadyE2EDeploymentName}}, + MinAvailable: intstrPtr(intstr.FromInt(4)), + }, + } +} + +func intstrPtr(value intstr.IntOrString) *intstr.IntOrString { + return &value +} + +func createMinReadyE2EObject(object client.Object) { + By(fmt.Sprintf("create %T %s/%s", object, object.GetNamespace(), object.GetName())) + Expect(k8sClient.Create(context.TODO(), object)).NotTo(HaveOccurred()) +} + +func createReadyMinReadyE2EDeployment(namespace string, deployment *apps.Deployment) { + createMinReadyE2EObject(deployment) + waitMinReadyE2EDeploymentReady(namespace) +} + +func createHealthyMinReadyE2ERollout(namespace string, rollout *v1beta1.Rollout) { + createMinReadyE2EObject(rollout) + waitMinReadyE2ERolloutPhase(namespace, rollout.Name, v1beta1.RolloutPhaseHealthy) +} + +func updateMinReadyE2EDeploymentVersion(namespace, version string) { + Expect(retry.RetryOnConflict(retry.DefaultRetry, func() error { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + if err := k8sClient.Get(context.TODO(), key, deployment); err != nil { + return err + } + deployment.Spec.Template.Spec.Containers[0].Env = mergeEnvVar( + deployment.Spec.Template.Spec.Containers[0].Env, + corev1.EnvVar{Name: "NODE_NAME", Value: version}, + ) + return k8sClient.Update(context.TODO(), deployment) + })).NotTo(HaveOccurred()) +} + +func resumeMinReadyE2ERollout(namespace, name string) { + resumedStep := markMinReadyE2ERolloutPausedStepReady(namespace, name) + if resumedStep < 0 { + return + } + waitMinReadyE2ERolloutStepTransitioned(namespace, name, resumedStep) +} + +func markMinReadyE2ERolloutPausedStepReady(namespace, name string) int32 { + resumedStep := int32(-1) + Eventually(func() bool { + rollout := &v1beta1.Rollout{} + key := types.NamespacedName{Namespace: namespace, Name: name} + Expect(k8sClient.Get(context.TODO(), key, rollout)).NotTo(HaveOccurred()) + if rollout.Status.Phase == v1beta1.RolloutPhaseHealthy { + return true + } + if rollout.Status.CanaryStatus == nil || rollout.Status.CanaryStatus.CurrentStepState != v1beta1.CanaryStepStatePaused { + return false + } + resumedStep = rollout.Status.CanaryStatus.CurrentStepIndex + body := fmt.Sprintf(`{"status":{"canaryStatus":{"currentStepState":"%s"}}}`, v1beta1.CanaryStepStateReady) + return k8sClient.Status().Patch(context.TODO(), rollout, client.RawPatch(types.MergePatchType, []byte(body))) == nil + }, 5*time.Minute, time.Second).Should(BeTrue()) + return resumedStep +} + +func waitMinReadyE2ERolloutStepTransitioned(namespace, name string, resumedStep int32) { + Eventually(func() bool { + rollout := &v1beta1.Rollout{} + key := types.NamespacedName{Namespace: namespace, Name: name} + Expect(k8sClient.Get(context.TODO(), key, rollout)).NotTo(HaveOccurred()) + if rollout.Status.Phase == v1beta1.RolloutPhaseHealthy { + return true + } + return rollout.Status.CanaryStatus != nil && + (rollout.Status.CanaryStatus.CurrentStepIndex != resumedStep || + rollout.Status.CanaryStatus.CurrentStepState != v1beta1.CanaryStepStatePaused) + }, 5*time.Minute, time.Second).Should(BeTrue()) +} + +func completeMinReadyE2ERollout(namespace, name string) { + Eventually(func() string { + rollout := &v1beta1.Rollout{} + key := types.NamespacedName{Namespace: namespace, Name: name} + if err := k8sClient.Get(context.TODO(), key, rollout); err != nil { + return fmt.Sprintf("get rollout failed: %v", err) + } + status := minReadyE2ERolloutStatus(rollout) + if rollout.Status.Phase == v1beta1.RolloutPhaseHealthy { + return status + } + if rollout.Status.CanaryStatus == nil || + rollout.Status.CanaryStatus.CurrentStepState != v1beta1.CanaryStepStatePaused { + return status + } + body := fmt.Sprintf(`{"status":{"canaryStatus":{"currentStepState":"%s"}}}`, v1beta1.CanaryStepStateReady) + if err := k8sClient.Status().Patch(context.TODO(), rollout, client.RawPatch(types.MergePatchType, []byte(body))); err != nil { + return fmt.Sprintf("%s patch ready failed: %v", status, err) + } + return status + }, 10*time.Minute, time.Second).Should(HavePrefix(fmt.Sprintf("phase=%s", v1beta1.RolloutPhaseHealthy))) +} + +func waitMinReadyE2ERolloutPhase(namespace, name string, phase v1beta1.RolloutPhase) { + Eventually(func() string { + rollout := &v1beta1.Rollout{} + key := types.NamespacedName{Namespace: namespace, Name: name} + if err := k8sClient.Get(context.TODO(), key, rollout); err != nil { + return fmt.Sprintf("get rollout failed: %v", err) + } + return minReadyE2ERolloutStatus(rollout) + }, 10*time.Minute, time.Second).Should(HavePrefix(fmt.Sprintf("phase=%s", phase))) +} + +func minReadyE2ERolloutStatus(rollout *v1beta1.Rollout) string { + status := fmt.Sprintf( + "phase=%s rolloutStep=%d rolloutState=%s message=%q", + rollout.Status.Phase, + rollout.Status.CurrentStepIndex, + rollout.Status.CurrentStepState, + rollout.Status.Message, + ) + if rollout.Status.CanaryStatus == nil { + return status + " canary=" + } + canary := rollout.Status.CanaryStatus + return fmt.Sprintf( + "%s canaryStep=%d nextStep=%d canaryState=%s canaryMessage=%q", + status, + canary.CurrentStepIndex, + canary.NextStepIndex, + canary.CurrentStepState, + canary.Message, + ) +} + +func waitMinReadyE2EDeploymentInflated(namespace string) { + Eventually(func() bool { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + Expect(k8sClient.Get(context.TODO(), key, deployment)).NotTo(HaveOccurred()) + return deployment.Spec.Strategy.Type == apps.RollingUpdateDeploymentStrategyType && + deployment.Spec.MinReadySeconds == partitiondeployment.InflatedMinReadySeconds && + deployment.Spec.Strategy.RollingUpdate != nil + }, 5*time.Minute, time.Second).Should(BeTrue()) +} + +func waitMinReadyE2EDeploymentRestored(namespace string) { + Eventually(func() bool { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + Expect(k8sClient.Get(context.TODO(), key, deployment)).NotTo(HaveOccurred()) + return deployment.Spec.MinReadySeconds == 0 && + deployment.Spec.Strategy.Type == apps.RollingUpdateDeploymentStrategyType && + deployment.Annotations[partitiondeployment.AnnotationOriginalMinReadySeconds] == "" + }, 10*time.Minute, time.Second).Should(BeTrue()) +} + +func waitMinReadyE2EBatchCondition(namespace, name, reason string) { + Eventually(func() bool { + release := &v1beta1.BatchRelease{} + key := types.NamespacedName{Namespace: namespace, Name: name} + if err := k8sClient.Get(context.TODO(), key, release); err != nil { + if apierrors.IsNotFound(err) { + return false + } + Expect(err).NotTo(HaveOccurred()) + } + for _, condition := range release.Status.Conditions { + if condition.Reason == reason { + return true + } + } + return false + }, 5*time.Minute, time.Second).Should(BeTrue()) +} + +func startMinReadyE2ERollout(namespace string) *v1beta1.Rollout { + rollout := newMinReadyE2ERollout(namespace) + deployment := newMinReadyE2EDeployment(namespace) + createReadyMinReadyE2EDeployment(namespace, deployment) + createHealthyMinReadyE2ERollout(namespace, rollout) + updateMinReadyE2EDeploymentVersion(namespace, "version2") + waitMinReadyE2EDeploymentInflated(namespace) + waitMinReadyE2EBatchCondition(namespace, rollout.Name, "MinReadyInitialized") + return rollout +} diff --git a/test/e2e/minready/deployment_minready_pdb_test.go b/test/e2e/minready/deployment_minready_pdb_test.go new file mode 100644 index 00000000..78ceb57d --- /dev/null +++ b/test/e2e/minready/deployment_minready_pdb_test.go @@ -0,0 +1,71 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package minready + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/openkruise/rollouts/api/v1beta1" + partitiondeployment "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment" +) + +var _ = SIGDescribe("Deployment MinReadySeconds PDB", func() { + var namespace string + + BeforeEach(func() { + namespace = randomNamespaceName("deployment-minready-pdb") + ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespace}} + Expect(k8sClient.Create(context.TODO(), ns)).Should(Succeed()) + }) + + AfterEach(func() { + _ = k8sClient.DeleteAllOf(context.TODO(), &v1beta1.BatchRelease{}, client.InNamespace(namespace)) + _ = k8sClient.DeleteAllOf(context.TODO(), &v1beta1.Rollout{}, client.InNamespace(namespace)) + _ = k8sClient.DeleteAllOf(context.TODO(), &policyv1.PodDisruptionBudget{}, client.InNamespace(namespace)) + _ = k8sClient.DeleteAllOf(context.TODO(), &apps.Deployment{}, client.InNamespace(namespace)) + Expect(k8sClient.Delete(context.TODO(), &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespace}})).Should(Succeed()) + time.Sleep(3 * time.Second) + }) + + KruiseDescribe("MinReadySeconds PDB coexistence", func() { + It("continues rollout initialization and leaves Deployment strategy untouched", func() { + rollout := newMinReadyE2ERollout(namespace) + deployment := newMinReadyE2EDeployment(namespace) + pdb := newMinReadyE2EPDB(namespace) + createReadyMinReadyE2EDeployment(namespace, deployment) + createMinReadyE2EObject(pdb) + + createHealthyMinReadyE2ERollout(namespace, rollout) + updateMinReadyE2EDeploymentVersion(namespace, "version2") + waitMinReadyE2EBatchCondition(namespace, rollout.Name, "MinReadyInitialized") + + got := &apps.Deployment{} + Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(deployment), got)).Should(Succeed()) + Expect(got.Spec.Strategy.Type).Should(Equal(apps.RollingUpdateDeploymentStrategyType)) + Expect(got.Spec.MinReadySeconds).Should(Equal(partitiondeployment.InflatedMinReadySeconds)) + }) + }) +}) diff --git a/test/e2e/minready/deployment_minready_scenarios_helper_test.go b/test/e2e/minready/deployment_minready_scenarios_helper_test.go new file mode 100644 index 00000000..47cc6fbf --- /dev/null +++ b/test/e2e/minready/deployment_minready_scenarios_helper_test.go @@ -0,0 +1,176 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package minready + +import ( + "context" + "fmt" + "time" + + . "github.com/onsi/gomega" + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/util/retry" + "k8s.io/utils/pointer" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/openkruise/rollouts/api/v1beta1" + partitiondeployment "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment" +) + +func waitMinReadyE2EDeploymentReplicas(namespace string, replicas int32) { + Eventually(func() bool { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + Expect(k8sClient.Get(context.TODO(), key, deployment)).Should(Succeed()) + return *deployment.Spec.Replicas == replicas && + deployment.Status.ObservedGeneration >= deployment.Generation + }, 5*time.Minute, time.Second).Should(BeTrue()) +} + +func waitMinReadyE2EOriginalAvailabilityAnnotations(namespace string, minReadySeconds, progressDeadlineSeconds int32) { + Eventually(func() bool { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + Expect(k8sClient.Get(context.TODO(), key, deployment)).Should(Succeed()) + return deployment.Annotations[partitiondeployment.AnnotationOriginalMinReadySeconds] == fmt.Sprintf("%d", minReadySeconds) && + deployment.Annotations[partitiondeployment.AnnotationOriginalProgressDeadlineSeconds] == fmt.Sprintf("%d", progressDeadlineSeconds) + }, 5*time.Minute, time.Second).Should(BeTrue()) +} + +func waitMinReadyE2EDeploymentRestoredWithAvailability(namespace string, minReadySeconds, progressDeadlineSeconds int32) { + Eventually(func() bool { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + Expect(k8sClient.Get(context.TODO(), key, deployment)).Should(Succeed()) + if deployment.Spec.ProgressDeadlineSeconds == nil || *deployment.Spec.ProgressDeadlineSeconds != progressDeadlineSeconds { + return false + } + for _, key := range partitiondeployment.AllOriginalAnnotations { + if deployment.Annotations[key] != "" { + return false + } + } + return deployment.Spec.MinReadySeconds == minReadySeconds && + deployment.Spec.Strategy.Type == apps.RollingUpdateDeploymentStrategyType + }, 10*time.Minute, time.Second).Should(BeTrue()) +} + +func deleteMinReadyE2ERollout(namespace, name string) { + rollout := &v1beta1.Rollout{} + key := types.NamespacedName{Namespace: namespace, Name: name} + Expect(k8sClient.Get(context.TODO(), key, rollout)).Should(Succeed()) + Expect(k8sClient.Delete(context.TODO(), rollout)).Should(Succeed()) +} + +func waitMinReadyE2ERolloutDeleted(namespace, name string) { + Eventually(func() bool { + rollout := &v1beta1.Rollout{} + key := types.NamespacedName{Namespace: namespace, Name: name} + return k8sClient.Get(context.TODO(), key, rollout) != nil + }, 5*time.Minute, time.Second).Should(BeTrue()) +} + +func restoreMinReadyE2EOriginalAnnotation(namespace, key, value string) { + Expect(retry.RetryOnConflict(retry.DefaultRetry, func() error { + deployment := &apps.Deployment{} + namespacedName := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + if err := k8sClient.Get(context.TODO(), namespacedName, deployment); err != nil { + return err + } + if deployment.Annotations == nil { + deployment.Annotations = map[string]string{} + } + deployment.Annotations[key] = value + return k8sClient.Update(context.TODO(), deployment) + })).NotTo(HaveOccurred()) +} + +func waitMinReadyE2EWebhookEndpointReady() { + Eventually(func() bool { + endpoints := &corev1.Endpoints{} + key := types.NamespacedName{Namespace: "kruise-rollout", Name: "kruise-rollout-webhook-service"} + if err := k8sClient.Get(context.TODO(), key, endpoints); err != nil { + return false + } + for _, subset := range endpoints.Subsets { + if len(subset.Addresses) > 0 { + return true + } + } + return false + }, 5*time.Minute, time.Second).Should(BeTrue()) +} + +func waitMinReadyE2EBatchMetricCondition(namespace, name, reason string) { + waitMinReadyE2EBatchCondition(namespace, name, reason) +} + +func waitMinReadyE2EEventReason(namespace, reason string) { + Eventually(func() bool { + events := &corev1.EventList{} + Expect(k8sClient.List(context.TODO(), events, client.InNamespace(namespace))).Should(Succeed()) + for _, event := range events.Items { + if event.Reason == reason { + return true + } + } + return false + }, 5*time.Minute, time.Second).Should(BeTrue()) +} + +func makeMinReadyE2ERolloutWithReplicas(namespace string, values ...string) *v1beta1.Rollout { + rollout := newMinReadyE2ERollout(namespace) + steps := make([]v1beta1.CanaryStep, 0, len(values)) + for _, value := range values { + steps = append(steps, v1beta1.CanaryStep{Replicas: intstrFromStringPtr(value), Pause: v1beta1.RolloutPause{}}) + } + rollout.Spec.Strategy.Canary.Steps = steps + return rollout +} + +func intstrFromStringPtr(value string) *intstr.IntOrString { + parsed := intstr.FromString(value) + return &parsed +} + +func expectMinReadyE2EInflatedMaxUnavailable(namespace string, want int32) { + waitMinReadyE2EInflatedMaxUnavailable(namespace, want, 5*time.Minute) +} + +func waitMinReadyE2EInflatedMaxUnavailable(namespace string, want int32, timeout time.Duration) { + Eventually(func() bool { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + Expect(k8sClient.Get(context.TODO(), key, deployment)).Should(Succeed()) + got := deployment.Spec.Strategy.RollingUpdate.MaxUnavailable + return got != nil && got.IntVal == want + }, timeout, time.Second).Should(BeTrue(), fmt.Sprintf("want maxUnavailable %d", want)) +} + +func expectMinReadyE2EOriginalAnnotationAbsent(namespace string) { + deployment := &apps.Deployment{} + key := types.NamespacedName{Namespace: namespace, Name: minReadyE2EDeploymentName} + Expect(k8sClient.Get(context.TODO(), key, deployment)).Should(Succeed()) + Expect(deployment.Annotations[partitiondeployment.AnnotationOriginalMinReadySeconds]).Should(Equal("")) +} + +func setMinReadyE2EInitialReplicas(deployment *apps.Deployment, replicas int32) { + deployment.Spec.Replicas = pointer.Int32(replicas) +} diff --git a/test/e2e/minready/deployment_minready_test.go b/test/e2e/minready/deployment_minready_test.go new file mode 100644 index 00000000..15aecbd6 --- /dev/null +++ b/test/e2e/minready/deployment_minready_test.go @@ -0,0 +1,155 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package minready + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/openkruise/rollouts/api/v1beta1" + partitiondeployment "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment" +) + +var _ = SIGDescribe("Deployment MinReadySeconds", func() { + var namespace string + + BeforeEach(func() { + namespace = randomNamespaceName("deployment-minready") + ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespace}} + Expect(k8sClient.Create(context.TODO(), ns)).Should(Succeed()) + }) + + AfterEach(func() { + _ = k8sClient.DeleteAllOf(context.TODO(), &v1beta1.BatchRelease{}, client.InNamespace(namespace)) + _ = k8sClient.DeleteAllOf(context.TODO(), &v1beta1.Rollout{}, client.InNamespace(namespace)) + _ = k8sClient.DeleteAllOf(context.TODO(), &apps.Deployment{}, client.InNamespace(namespace)) + Expect(k8sClient.Delete(context.TODO(), &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespace}})).Should(Succeed()) + time.Sleep(3 * time.Second) + }) + + KruiseDescribe("MinReadySeconds deployment rollout", func() { + It("TC1 normal rollout keeps RollingUpdate and restores original fields", func() { + rollout := startMinReadyE2ERollout(namespace) + finishMinReadyE2ERollout(namespace, rollout.Name) + }) + + It("TC2 rollback returns to the stable template", func() { + rollout := newMinReadyE2ERollout(namespace) + deployment := newMinReadyE2EDeployment(namespace) + createReadyMinReadyE2EDeployment(namespace, deployment) + createHealthyMinReadyE2ERollout(namespace, rollout) + + updateMinReadyE2EDeploymentVersion(namespace, "version2") + waitMinReadyE2EDeploymentInflated(namespace) + waitMinReadyE2ERolloutStepPaused(namespace, rollout.Name, 1) + updateMinReadyE2EDeploymentVersion(namespace, "version1") + waitMinReadyE2ERolloutPhase(namespace, rollout.Name, v1beta1.RolloutPhaseHealthy) + waitMinReadyE2EDeploymentRestored(namespace) + + expectMinReadyE2EDeploymentVersion(namespace, "version1") + }) + + It("TC3 continuous release rolls v1 to v2 to v3 and refreshes original availability fields", func() { + rollout := startMinReadyE2ERollout(namespace) + waitMinReadyE2ERolloutStepPaused(namespace, rollout.Name, 1) + + updateMinReadyE2EDeploymentContinuousRelease(namespace, "version3", 9, 90) + waitMinReadyE2EDeploymentInflated(namespace) + waitMinReadyE2EOriginalAvailabilityAnnotations(namespace, 9, 90) + expectMinReadyE2EDeploymentVersion(namespace, "version3") + + finishMinReadyE2ERolloutWithAvailability(namespace, rollout.Name, 9, 90) + }) + + It("TC4 controller restart resumes from the persisted MinReadySeconds state", func() { + rollout := startMinReadyE2ERollout(namespace) + waitMinReadyE2ERolloutStepPaused(namespace, rollout.Name, 1) + restartMinReadyE2EControllerManager() + + waitMinReadyE2EDeploymentInflated(namespace) + waitMinReadyE2ERolloutStepPaused(namespace, rollout.Name, 1) + finishMinReadyE2ERollout(namespace, rollout.Name) + }) + + It("TC5 scale changes remain safe while rollout is active", func() { + rollout := makeMinReadyE2ERolloutWithReplicas(namespace, "25%", "50%", "100%") + deployment := newMinReadyE2EDeployment(namespace) + setMinReadyE2EInitialReplicas(deployment, 4) + createReadyMinReadyE2EDeployment(namespace, deployment) + createHealthyMinReadyE2ERollout(namespace, rollout) + + updateMinReadyE2EDeploymentVersion(namespace, "version2") + waitMinReadyE2EDeploymentInflated(namespace) + waitMinReadyE2ERolloutStepPaused(namespace, rollout.Name, 1) + patchMinReadyE2EDeploymentReplicas(namespace, 8) + waitMinReadyE2EDeploymentReplicas(namespace, 8) + waitMinReadyE2ERolloutStepPaused(namespace, rollout.Name, 1) + resumeMinReadyE2ERollout(namespace, rollout.Name) + expectMinReadyE2EInflatedMaxUnavailable(namespace, 4) + finishMinReadyE2ERollout(namespace, rollout.Name) + }) + + It("TC6 deleting Rollout restores annotations and lets native RollingUpdate continue", func() { + rollout := startMinReadyE2ERollout(namespace) + deleteMinReadyE2ERollout(namespace, rollout.Name) + + waitMinReadyE2ERolloutDeleted(namespace, rollout.Name) + waitMinReadyE2EDeploymentRestored(namespace) + expectMinReadyE2EOriginalAnnotationAbsent(namespace) + }) + + It("TC7 external maxUnavailable drift converges to the current batch target", func() { + // 4 steps: 60% keeps target=3 on 5 replicas so resume does not jump to target=5. + rollout := makeMinReadyE2ERolloutWithReplicas(namespace, "20%", "50%", "60%", "100%") + deployment := newMinReadyE2EDeployment(namespace) + createReadyMinReadyE2EDeployment(namespace, deployment) + createHealthyMinReadyE2ERollout(namespace, rollout) + updateMinReadyE2EDeploymentVersion(namespace, "version2") + waitMinReadyE2EDeploymentInflated(namespace) + waitMinReadyE2EBatchCondition(namespace, rollout.Name, "MinReadyInitialized") + + waitMinReadyE2ERolloutStepPaused(namespace, rollout.Name, 1) + patchMinReadyE2EMaxUnavailable(namespace, 5) + // Heal drift to the paused step's batch target (20% on 5 replicas => 1). + expectMinReadyE2EInflatedMaxUnavailable(namespace, 1) + // Observe the next batch target before the rollout can race ahead to a later batch. + markMinReadyE2ERolloutPausedStepReady(namespace, rollout.Name) + // 50% batch target is also 3 on 5 replicas; wait for UpgradeBatch, not step 2 pause. + waitMinReadyE2EInflatedMaxUnavailable(namespace, 3, 10*time.Minute) + finishMinReadyE2ERollout(namespace, rollout.Name) + }) + + It("TC8 missing annotation blocks finalize until the operator restores it", func() { + rollout := startMinReadyE2ERollout(namespace) + waitMinReadyE2ERolloutStepPaused(namespace, rollout.Name, 1) + deleteMinReadyE2EOriginalAnnotation(namespace, partitiondeployment.AnnotationOriginalMaxUnavailable) + resumeMinReadyE2ERollout(namespace, rollout.Name) + waitMinReadyE2EBatchCondition(namespace, rollout.Name, "MinReadyDegradedMissingAnnotations") + waitMinReadyE2EEventReason(namespace, "MinReadyDegradedMissingAnnotations") + + restoreMinReadyE2EOriginalAnnotation(namespace, partitiondeployment.AnnotationOriginalMaxUnavailable, "25%") + finishMinReadyE2ERollout(namespace, rollout.Name) + }) + }) +}) diff --git a/test/e2e/minready/suite_test.go b/test/e2e/minready/suite_test.go new file mode 100644 index 00000000..f9e5c14f --- /dev/null +++ b/test/e2e/minready/suite_test.go @@ -0,0 +1,111 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package minready + +import ( + "context" + "fmt" + "math/rand" + "os" + "path/filepath" + "strconv" + "testing" + "time" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + kruisev1alpha1 "github.com/openkruise/kruise-api/apps/v1alpha1" + kruisev1beta1 "github.com/openkruise/kruise-api/apps/v1beta1" + crdv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/config" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + gatewayv1beta1 "sigs.k8s.io/gateway-api/apis/v1beta1" + "sigs.k8s.io/yaml" + + rolloutapi "github.com/openkruise/rollouts/api" +) + +var k8sClient client.Client +var scheme = runtime.NewScheme() + +func TestMinReadyE2E(t *testing.T) { + RegisterFailHandler(Fail) + + RunSpecsWithDefaultAndCustomReporters(t, + "Deployment MinReadySeconds E2E Suite", []Reporter{}) +} + +var _ = BeforeSuite(func(done Done) { + By("Bootstrapping MinReady test environment") + rand.Seed(time.Now().UnixNano()) + logf.SetLogger(zap.New(zap.UseDevMode(true), zap.WriteTo(GinkgoWriter))) + err := clientgoscheme.AddToScheme(scheme) + Expect(err).Should(BeNil()) + err = rolloutapi.AddToScheme(scheme) + Expect(err).Should(BeNil()) + err = crdv1.AddToScheme(scheme) + Expect(err).Should(BeNil()) + err = kruisev1beta1.AddToScheme(scheme) + Expect(err).Should(BeNil()) + err = kruisev1alpha1.AddToScheme(scheme) + Expect(err).Should(BeNil()) + err = gatewayv1beta1.AddToScheme(scheme) + Expect(err).Should(BeNil()) + By("Setting up kubernetes client") + k8sClient, err = client.New(config.GetConfigOrDie(), client.Options{Scheme: scheme}) + if err != nil { + logf.Log.Error(err, "failed to create k8sClient") + Fail("setup failed") + } + By("Create the CRDs") + var httprouteCRD crdv1.CustomResourceDefinition + err = readYamlToObject("../test_data/crds/httproutes.yaml", &httprouteCRD) + Expect(err).Should(BeNil()) + err = k8sClient.Create(context.TODO(), &httprouteCRD) + if errors.IsAlreadyExists(err) { + err = nil + } + Expect(err).Should(BeNil()) + + close(done) + By("Finished setting up MinReady test environment") +}, 300) + +func readYamlToObject(path string, object runtime.Object) error { + data, err := os.ReadFile(filepath.Clean(path)) + if err != nil { + return err + } + return yaml.Unmarshal(data, object) +} + +func randomNamespaceName(basic string) string { + return fmt.Sprintf("%s-%s", basic, strconv.FormatInt(rand.Int63(), 16)) +} + +func SIGDescribe(text string, body func()) bool { + return Describe("[rollouts] "+text, body) +} + +func KruiseDescribe(text string, body func()) bool { + return Describe("[kruise.io] "+text, body) +} diff --git a/test/integration/concurrency_test.go b/test/integration/concurrency_test.go new file mode 100644 index 00000000..7e680a4c --- /dev/null +++ b/test/integration/concurrency_test.go @@ -0,0 +1,120 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration + +import ( + "strings" + "testing" + + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/tools/record" + "k8s.io/utils/pointer" + + "github.com/openkruise/rollouts/api/v1beta1" + partitiondeployment "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment" + "github.com/openkruise/rollouts/pkg/feature" + "github.com/openkruise/rollouts/pkg/util" + utilfeature "github.com/openkruise/rollouts/pkg/util/feature" +) + +func TestDeploymentMinReadyConcurrentScaleUsesLatestReplicas(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := newIntegrationMinReadyRelease() + release.Status.CanaryStatus.CurrentBatch = 1 + deployment := newInflatedIntegrationDeployment() + deployment.Spec.Replicas = pointer.Int32(20) + deployment.Status.Replicas = 20 + deployment.Status.UpdatedReplicas = 10 + deployment.Status.ReadyReplicas = 10 + recorder := record.NewFakeRecorder(20) + cli := newIntegrationClient(release, deployment) + status := release.Status.DeepCopy() + control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) + + if err := control.UpgradeBatch(); err != nil { + t.Fatalf("UpgradeBatch failed: %v", err) + } + + got := fetchIntegrationDeployment(t, cli, deployment) + // P0-3 sliding window: UpgradeBatch advances maxUnavailable one step at a + // time using the original budget (25%), not the full batch target in one + // patch. First step = 25% * 20 = 5 (computed from the scaled-up replica + // count, not the pre-scale 10 which would give 25% * 10 = 2); the batch + // target 50% * 20 = 10 is reached over subsequent reconciles as the window + // fills. This still asserts the "uses latest replicas" intent: the step is + // derived from the current replica count (20), not a stale one. + if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 5 { + t.Fatalf("maxUnavailable = %v, want 5 (sliding-window first step = 25%% of 20 replicas) after scale to 20 replicas", unavailable) + } + assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyBatching, corev1.ConditionTrue, "MinReadyBatching") +} + +func TestDeploymentMinReadyConcurrentMaxUnavailableAboveTargetSelfHeals(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := newIntegrationMinReadyRelease() + release.Status.CanaryStatus.CurrentBatch = 1 + deployment := newInflatedIntegrationDeployment() + driftedMaxUnavailable := intstr.FromInt(6) + deployment.Spec.Strategy.RollingUpdate.MaxUnavailable = &driftedMaxUnavailable + deployment.Status.Replicas = 10 + deployment.Status.UpdatedReplicas = 5 + deployment.Status.ReadyReplicas = 5 + recorder := record.NewFakeRecorder(20) + cli := newIntegrationClient(release, deployment) + status := release.Status.DeepCopy() + control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) + + err := control.UpgradeBatch() + if err != nil { + t.Fatalf("UpgradeBatch failed: %v", err) + } + + got := fetchIntegrationDeployment(t, cli, deployment) + if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 5 { + t.Fatalf("maxUnavailable = %v, want target value 5", unavailable) + } + if degraded := util.GetBatchReleaseCondition(*status, v1beta1.RolloutConditionMinReadyDegraded); degraded != nil { + t.Fatalf("degraded condition = %v, want nil", degraded) + } +} + +func TestDeploymentMinReadyConcurrentAnnotationDeletionBlocksFinalize(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := newIntegrationMinReadyRelease() + deployment := newInflatedIntegrationDeployment() + delete(deployment.Annotations, partitiondeployment.AnnotationOriginalMaxUnavailable) + recorder := record.NewFakeRecorder(20) + cli := newIntegrationClient(release, deployment) + status := release.Status.DeepCopy() + control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) + + err := control.Finalize() + if err == nil || !strings.Contains(err.Error(), partitiondeployment.AnnotationOriginalMaxUnavailable) { + t.Fatalf("Finalize error = %v, want missing annotation", err) + } + + got := fetchIntegrationDeployment(t, cli, deployment) + if got.Spec.MinReadySeconds != partitiondeployment.InflatedMinReadySeconds { + t.Fatalf("minReadySeconds = %d, want inflated value preserved", got.Spec.MinReadySeconds) + } + assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyDegraded, corev1.ConditionTrue, "MinReadyDegradedMissingAnnotations") + assertIntegrationEvent(t, recorder, "MinReadyDegradedMissingAnnotations") +} + +var _ = apps.RollingUpdateDeploymentStrategyType diff --git a/test/integration/deployment_minready_test.go b/test/integration/deployment_minready_test.go new file mode 100644 index 00000000..a0f6c4c6 --- /dev/null +++ b/test/integration/deployment_minready_test.go @@ -0,0 +1,182 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration + +import ( + "strings" + "testing" + + corev1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/record" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/openkruise/rollouts/api/v1beta1" + partitiondeployment "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment" + "github.com/openkruise/rollouts/pkg/feature" + utilfeature "github.com/openkruise/rollouts/pkg/util/feature" +) + +func TestDeploymentMinReadyControlPlaneInitialize(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := newIntegrationMinReadyRelease() + deployment := newIntegrationDeployment() + recorder := record.NewFakeRecorder(20) + cli := newIntegrationClient(release, deployment) + status := release.Status.DeepCopy() + control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) + + if err := control.Initialize(); err != nil { + t.Fatalf("Initialize failed: %v", err) + } + + got := fetchIntegrationDeployment(t, cli, deployment) + assertInflatedDeployment(t, got) + assertOriginalAnnotation(t, got, partitiondeployment.AnnotationOriginalMinReadySeconds, "5") + assertOriginalAnnotation(t, got, partitiondeployment.AnnotationOriginalProgressDeadlineSeconds, "60") + assertOriginalAnnotation(t, got, partitiondeployment.AnnotationOriginalMaxUnavailable, "25%") + assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyInitialized, corev1.ConditionTrue, "MinReadyInitialized") + assertIntegrationEvent(t, recorder, "MinReadyInitialized") +} + +func TestDeploymentMinReadyControlPlaneRejectsFeatureGateDisabled(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=false") + release := newIntegrationMinReadyRelease() + deployment := newIntegrationDeployment() + recorder := record.NewFakeRecorder(20) + cli := newIntegrationClient(release, deployment) + status := release.Status.DeepCopy() + control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) + + err := control.Initialize() + if err == nil || !strings.Contains(err.Error(), "feature gate is disabled") { + t.Fatalf("Initialize error = %v, want feature gate disabled", err) + } +} + +func TestDeploymentMinReadyControlPlaneAllowsCoveringPDB(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := newIntegrationMinReadyRelease() + deployment := newIntegrationDeployment() + pdb := &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{Name: "demo-pdb", Namespace: deployment.Namespace}, + Spec: policyv1.PodDisruptionBudgetSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "demo"}}, + }, + } + recorder := record.NewFakeRecorder(20) + cli := newIntegrationClient(release, deployment, pdb) + status := release.Status.DeepCopy() + control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) + + if err := control.Initialize(); err != nil { + t.Fatalf("Initialize failed: %v", err) + } + + got := fetchIntegrationDeployment(t, cli, deployment) + assertInflatedDeployment(t, got) + assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyInitialized, corev1.ConditionTrue, "MinReadyInitialized") +} + +func TestDeploymentMinReadyControlPlaneUpgradeBatchUsesUpdatedReadyReplicas(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := newIntegrationMinReadyRelease() + release.Status.CanaryStatus.CurrentBatch = 1 + deployment := newInflatedIntegrationDeployment() + deployment.Status.Replicas = 10 + deployment.Status.UpdatedReplicas = 5 + deployment.Status.ReadyReplicas = 5 + rs := newIntegrationUpdatedReplicaSet(deployment, release.Status.UpdateRevision, 5, 5) + pods := newIntegrationUpdatedPods(deployment, rs, release.Status.UpdateRevision, "", 5, 5) + recorder := record.NewFakeRecorder(20) + cli := newIntegrationClient(appendIntegrationObjects([]client.Object{release, deployment, rs}, pods)...) + status := release.Status.DeepCopy() + control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) + + if err := control.UpgradeBatch(); err != nil { + t.Fatalf("UpgradeBatch failed: %v", err) + } + if err := control.EnsureBatchPodsReadyAndLabeled(); err != nil { + t.Fatalf("EnsureBatchPodsReadyAndLabeled failed: %v", err) + } + + got := fetchIntegrationDeployment(t, cli, deployment) + if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 5 { + t.Fatalf("maxUnavailable = %v, want 5", unavailable) + } + assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyBatching, corev1.ConditionTrue, "MinReadyBatchReady") + assertIntegrationEvent(t, recorder, "MinReadyBatchReady") +} + +func TestDeploymentMinReadyControlPlaneWaitsForUpdatedReadyReplicas(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := newIntegrationMinReadyRelease() + release.Status.CanaryStatus.CurrentBatch = 1 + deployment := newInflatedIntegrationDeployment() + deployment.Status.Replicas = 10 + deployment.Status.UpdatedReplicas = 5 + deployment.Status.ReadyReplicas = 10 + rs := newIntegrationUpdatedReplicaSet(deployment, release.Status.UpdateRevision, 5, 1) + pods := newIntegrationUpdatedPods(deployment, rs, release.Status.UpdateRevision, "", 5, 1) + recorder := record.NewFakeRecorder(20) + cli := newIntegrationClient(appendIntegrationObjects([]client.Object{release, deployment, rs}, pods)...) + status := release.Status.DeepCopy() + control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) + + if err := control.UpgradeBatch(); err != nil { + t.Fatalf("UpgradeBatch failed: %v", err) + } + if err := control.EnsureBatchPodsReadyAndLabeled(); err == nil { + t.Fatalf("EnsureBatchPodsReadyAndLabeled succeeded, want updated ready wait error") + } +} + +func TestDeploymentMinReadyControlPlaneFinalizeRestoresOriginalFields(t *testing.T) { + _ = utilfeature.DefaultMutableFeatureGate.Set(string(feature.MinReadySecondsStrategy) + "=true") + release := newIntegrationMinReadyRelease() + deployment := newInflatedIntegrationDeployment() + recorder := record.NewFakeRecorder(20) + cli := newIntegrationClient(release, deployment) + status := release.Status.DeepCopy() + control := newIntegrationMinReadyControl(cli, recorder, release, status, deployment.Name) + + if err := control.Finalize(); err != nil { + t.Fatalf("Finalize failed: %v", err) + } + + got := fetchIntegrationDeployment(t, cli, deployment) + if got.Spec.MinReadySeconds != 5 { + t.Fatalf("minReadySeconds = %d, want 5", got.Spec.MinReadySeconds) + } + if got.Spec.ProgressDeadlineSeconds == nil || *got.Spec.ProgressDeadlineSeconds != 60 { + t.Fatalf("progressDeadlineSeconds = %v, want 60", got.Spec.ProgressDeadlineSeconds) + } + if unavailable := got.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.StrVal != "25%" { + t.Fatalf("maxUnavailable = %v, want 25%%", unavailable) + } + if surge := got.Spec.Strategy.RollingUpdate.MaxSurge; surge == nil || surge.IntVal != 1 { + t.Fatalf("maxSurge = %v, want 1", surge) + } + for _, key := range partitiondeployment.AllOriginalAnnotations { + if _, ok := got.Annotations[key]; ok { + t.Fatalf("annotation %s still exists", key) + } + } + assertIntegrationCondition(t, status, v1beta1.RolloutConditionMinReadyFinalized, corev1.ConditionTrue, "MinReadyFinalized") + assertIntegrationEvent(t, recorder, "MinReadyFinalized") +} diff --git a/test/integration/minready_helpers_test.go b/test/integration/minready_helpers_test.go new file mode 100644 index 00000000..b2158ff0 --- /dev/null +++ b/test/integration/minready_helpers_test.go @@ -0,0 +1,304 @@ +/* +Copyright 2026 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration + +import ( + "context" + "strings" + "testing" + "time" + + apps "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/client-go/tools/record" + "k8s.io/utils/pointer" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + rolloutapi "github.com/openkruise/rollouts/api" + "github.com/openkruise/rollouts/api/v1beta1" + "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle" + partitiondeployment "github.com/openkruise/rollouts/pkg/controller/batchrelease/control/partitionstyle/deployment" +) + +var integrationScheme = runtime.NewScheme() + +func init() { + utilruntime.Must(apps.AddToScheme(integrationScheme)) + utilruntime.Must(corev1.AddToScheme(integrationScheme)) + utilruntime.Must(policyv1.AddToScheme(integrationScheme)) + utilruntime.Must(rolloutapi.AddToScheme(integrationScheme)) +} + +func newIntegrationMinReadyRelease() *v1beta1.BatchRelease { + return &v1beta1.BatchRelease{ + TypeMeta: metav1.TypeMeta{APIVersion: v1beta1.GroupVersion.String(), Kind: "BatchRelease"}, + ObjectMeta: metav1.ObjectMeta{ + Name: "demo-release", + Namespace: "default", + }, + Spec: v1beta1.BatchReleaseSpec{ + WorkloadRef: v1beta1.ObjectRef{APIVersion: apps.SchemeGroupVersion.String(), Kind: "Deployment", Name: "demo"}, + ReleasePlan: v1beta1.ReleasePlan{ + RollingStyle: v1beta1.PartitionRollingStyle, + Batches: []v1beta1.ReleaseBatch{ + {CanaryReplicas: intstr.FromString("20%")}, + {CanaryReplicas: intstr.FromString("50%")}, + {CanaryReplicas: intstr.FromString("100%")}, + }, + }, + }, + Status: v1beta1.BatchReleaseStatus{ + Phase: v1beta1.RolloutPhasePreparing, + StableRevision: "stable", + UpdateRevision: "updated", + }, + } +} + +func newIntegrationDeployment() *apps.Deployment { + progressDeadlineSeconds := int32(60) + maxUnavailable := intstr.FromString("25%") + maxSurge := intstr.FromInt(1) + return &apps.Deployment{ + TypeMeta: metav1.TypeMeta{APIVersion: apps.SchemeGroupVersion.String(), Kind: "Deployment"}, + ObjectMeta: metav1.ObjectMeta{ + Name: "demo", + Namespace: "default", + ResourceVersion: "1", + UID: types.UID("integration-deployment-uid"), + Labels: map[string]string{"app": "demo"}, + }, + Spec: apps.DeploymentSpec{ + Replicas: pointer.Int32(10), + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "demo"}}, + Template: newIntegrationPodTemplate(), + MinReadySeconds: 5, + ProgressDeadlineSeconds: &progressDeadlineSeconds, + Strategy: apps.DeploymentStrategy{ + Type: apps.RollingUpdateDeploymentStrategyType, + RollingUpdate: &apps.RollingUpdateDeployment{ + MaxUnavailable: &maxUnavailable, + MaxSurge: &maxSurge, + }, + }, + }, + Status: apps.DeploymentStatus{ + Replicas: 10, + ReadyReplicas: 10, + UpdatedReplicas: 0, + AvailableReplicas: 10, + }, + } +} + +func newInflatedIntegrationDeployment() *apps.Deployment { + deployment := newIntegrationDeployment() + progressDeadlineSeconds := partitiondeployment.InflatedProgressDeadlineSeconds + maxUnavailable := intstr.FromInt(0) + deployment.Spec.MinReadySeconds = partitiondeployment.InflatedMinReadySeconds + deployment.Spec.ProgressDeadlineSeconds = &progressDeadlineSeconds + deployment.Spec.Strategy.RollingUpdate.MaxUnavailable = &maxUnavailable + deployment.Annotations = map[string]string{ + partitiondeployment.AnnotationOriginalMinReadySeconds: "5", + partitiondeployment.AnnotationOriginalProgressDeadlineSeconds: "60", + partitiondeployment.AnnotationOriginalMaxUnavailable: "25%", + } + return deployment +} + +func newIntegrationPodTemplate() corev1.PodTemplateSpec { + return corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{"app": "demo"}}, + Spec: corev1.PodSpec{Containers: []corev1.Container{{ + Name: "main", + Image: "busybox:v2", + }}}, + } +} + +func newIntegrationClient(objects ...client.Object) client.Client { + return fake.NewClientBuilder(). + WithScheme(integrationScheme). + WithObjects(objects...). + WithStatusSubresource(&v1beta1.BatchRelease{}). + Build() +} + +func newIntegrationUpdatedReplicaSet(deployment *apps.Deployment, updateRevision string, replicas, readyReplicas int32) *apps.ReplicaSet { + return &apps.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: deployment.Name + "-" + updateRevision, + Namespace: deployment.Namespace, + Labels: map[string]string{ + apps.DefaultDeploymentUniqueLabelKey: updateRevision, + }, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: apps.SchemeGroupVersion.String(), + Kind: "Deployment", + Name: deployment.Name, + UID: deployment.UID, + Controller: pointer.Bool(true), + }, + }, + }, + Spec: apps.ReplicaSetSpec{ + Replicas: pointer.Int32(replicas), + Selector: deployment.Spec.Selector.DeepCopy(), + Template: deployment.Spec.Template, + }, + Status: apps.ReplicaSetStatus{ + Replicas: replicas, + ReadyReplicas: readyReplicas, + }, + } +} + +func newIntegrationUpdatedPods(deployment *apps.Deployment, rs *apps.ReplicaSet, updateRevision, rolloutID string, total, ready int) []*corev1.Pod { + pods := make([]*corev1.Pod, 0, total) + for i := 0; i < total; i++ { + readyCondition := corev1.ConditionFalse + if i < ready { + readyCondition = corev1.ConditionTrue + } + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: deployment.Name + "-pod-" + string(rune('a'+i)), + Namespace: deployment.Namespace, + Labels: map[string]string{ + "app": "demo", + apps.DefaultDeploymentUniqueLabelKey: updateRevision, + }, + OwnerReferences: []metav1.OwnerReference{{ + APIVersion: apps.SchemeGroupVersion.String(), + Kind: "ReplicaSet", + Name: rs.Name, + UID: rs.UID, + Controller: pointer.Bool(true), + }}, + }, + Status: corev1.PodStatus{Conditions: []corev1.PodCondition{{ + Type: corev1.PodReady, + Status: readyCondition, + LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Second)), + }}}, + } + if rolloutID != "" { + pod.Labels[v1beta1.RolloutIDLabel] = rolloutID + } + pods = append(pods, pod) + } + return pods +} + +func appendIntegrationObjects(objects []client.Object, pods []*corev1.Pod) []client.Object { + for _, pod := range pods { + objects = append(objects, pod) + } + return objects +} + +func newIntegrationMinReadyControl( + cli client.Client, + recorder record.EventRecorder, + release *v1beta1.BatchRelease, + status *v1beta1.BatchReleaseStatus, + deploymentName string, +) interface { + Initialize() error + UpgradeBatch() error + EnsureBatchPodsReadyAndLabeled() error + Finalize() error +} { + return partitionstyle.NewControlPlane( + context.Background(), + partitiondeployment.NewMinReadyController, + cli, + recorder, + release, + status, + types.NamespacedName{Namespace: release.Namespace, Name: deploymentName}, + apps.SchemeGroupVersion.WithKind("Deployment"), + ) +} + +func fetchIntegrationDeployment(t *testing.T, cli client.Client, deployment *apps.Deployment) *apps.Deployment { + t.Helper() + got := &apps.Deployment{} + key := types.NamespacedName{Namespace: deployment.Namespace, Name: deployment.Name} + if err := cli.Get(context.TODO(), key, got); err != nil { + t.Fatalf("Get deployment failed: %v", err) + } + return got +} + +func assertInflatedDeployment(t *testing.T, deployment *apps.Deployment) { + t.Helper() + if deployment.Spec.Strategy.Type != apps.RollingUpdateDeploymentStrategyType { + t.Fatalf("strategy.type = %q, want RollingUpdate", deployment.Spec.Strategy.Type) + } + if deployment.Spec.MinReadySeconds != partitiondeployment.InflatedMinReadySeconds { + t.Fatalf("minReadySeconds = %d, want %d", deployment.Spec.MinReadySeconds, partitiondeployment.InflatedMinReadySeconds) + } + if unavailable := deployment.Spec.Strategy.RollingUpdate.MaxUnavailable; unavailable == nil || unavailable.IntVal != 0 { + t.Fatalf("maxUnavailable = %v, want 0", unavailable) + } +} + +func assertOriginalAnnotation(t *testing.T, deployment *apps.Deployment, key, want string) { + t.Helper() + if got := deployment.Annotations[key]; got != want { + t.Fatalf("annotation %s = %q, want %q", key, got, want) + } +} + +func assertIntegrationCondition( + t *testing.T, + status *v1beta1.BatchReleaseStatus, + condType v1beta1.RolloutConditionType, + condStatus corev1.ConditionStatus, + reason string, +) { + t.Helper() + for _, condition := range status.Conditions { + if condition.Type == condType && condition.Status == condStatus && condition.Reason == reason { + return + } + } + t.Fatalf("condition %s with %s/%s not found in %#v", condType, condStatus, reason, status.Conditions) +} + +func assertIntegrationEvent(t *testing.T, recorder *record.FakeRecorder, want string) { + t.Helper() + for { + select { + case event := <-recorder.Events: + if strings.Contains(event, want) { + return + } + default: + t.Fatalf("event %q not recorded", want) + } + } +}