diff --git a/internal/controllers/machinedeployment/machinedeployment_controller.go b/internal/controllers/machinedeployment/machinedeployment_controller.go index 7aa88f4968d1..84f737d6f415 100644 --- a/internal/controllers/machinedeployment/machinedeployment_controller.go +++ b/internal/controllers/machinedeployment/machinedeployment_controller.go @@ -309,18 +309,17 @@ func (r *Reconciler) reconcile(ctx context.Context, s *scope) error { return errors.Errorf("missing MachineDeployment strategy") } - if md.Spec.Strategy.Type == clusterv1.RollingUpdateMachineDeploymentStrategyType { + switch md.Spec.Strategy.Type { + case clusterv1.RollingUpdateMachineDeploymentStrategyType: if md.Spec.Strategy.RollingUpdate == nil { return errors.Errorf("missing MachineDeployment settings for strategy type: %s", md.Spec.Strategy.Type) } return r.rolloutRolling(ctx, md, s.machineSets, templateExists) - } - - if md.Spec.Strategy.Type == clusterv1.OnDeleteMachineDeploymentStrategyType { + case clusterv1.OnDeleteMachineDeploymentStrategyType: return r.rolloutOnDelete(ctx, md, s.machineSets, templateExists) + default: + return errors.Errorf("unexpected deployment strategy type: %s", md.Spec.Strategy.Type) } - - return errors.Errorf("unexpected deployment strategy type: %s", md.Spec.Strategy.Type) } func (r *Reconciler) reconcileDelete(ctx context.Context, s *scope) error { diff --git a/internal/controllers/machinedeployment/machinedeployment_rolling.go b/internal/controllers/machinedeployment/machinedeployment_rolling.go index 2d3db1ceb7c0..b433ab8d21a2 100644 --- a/internal/controllers/machinedeployment/machinedeployment_rolling.go +++ b/internal/controllers/machinedeployment/machinedeployment_rolling.go @@ -179,6 +179,10 @@ func (r *Reconciler) reconcileOldMachineSets(ctx context.Context, allMSs []*clus log.V(4).Info("Cleaned up unhealthy replicas from old MachineSets", "count", cleanupCount) + if err := r.propagateDeletionTimeoutsToOldMachineSet(ctx, oldMSs, deployment); err != nil { + return err + } + // Scale down old MachineSets, need check maxUnavailable to ensure we can scale down allMSs = oldMSs allMSs = append(allMSs, newMS) diff --git a/internal/controllers/machinedeployment/machinedeployment_rolling_test.go b/internal/controllers/machinedeployment/machinedeployment_rolling_test.go index 696efda400ad..dd702e344fea 100644 --- a/internal/controllers/machinedeployment/machinedeployment_rolling_test.go +++ b/internal/controllers/machinedeployment/machinedeployment_rolling_test.go @@ -19,6 +19,7 @@ package machinedeployment import ( "strconv" "testing" + "time" . "github.com/onsi/gomega" "github.com/pkg/errors" @@ -302,6 +303,7 @@ func TestReconcileNewMachineSet(t *testing.T) { } func TestReconcileOldMachineSets(t *testing.T) { + duration10m := &metav1.Duration{Duration: 10 * time.Minute} testCases := []struct { name string machineDeployment *clusterv1.MachineDeployment @@ -468,6 +470,83 @@ func TestReconcileOldMachineSets(t *testing.T) { }, expectedOldMachineSetsReplicas: 8, }, + { + name: "RollingUpdate strategy: Scale down old MachineSets and propagate the node deletion related timeouts to old MachineSets", + machineDeployment: &clusterv1.MachineDeployment{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "foo", + Name: "bar", + }, + Spec: clusterv1.MachineDeploymentSpec{ + Strategy: &clusterv1.MachineDeploymentStrategy{ + Type: clusterv1.RollingUpdateMachineDeploymentStrategyType, + RollingUpdate: &clusterv1.MachineRollingUpdateDeployment{ + MaxUnavailable: intOrStrPtr(1), + MaxSurge: intOrStrPtr(3), + }, + }, + Replicas: ptr.To[int32](2), + Template: clusterv1.MachineTemplateSpec{ + Spec: clusterv1.MachineSpec{ + NodeDrainTimeout: duration10m, + NodeDeletionTimeout: duration10m, + NodeVolumeDetachTimeout: duration10m, + }, + }, + }, + }, + newMachineSet: &clusterv1.MachineSet{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "foo", + Name: "bar", + }, + Spec: clusterv1.MachineSetSpec{ + Replicas: ptr.To[int32](0), + }, + Status: clusterv1.MachineSetStatus{ + Deprecated: &clusterv1.MachineSetDeprecatedStatus{ + V1Beta1: &clusterv1.MachineSetV1Beta1DeprecatedStatus{ + AvailableReplicas: 2, + }, + }, + }, + }, + oldMachineSets: []*clusterv1.MachineSet{ + { + ObjectMeta: metav1.ObjectMeta{ + Namespace: "foo", + Name: "2replicas", + }, + Spec: clusterv1.MachineSetSpec{ + Replicas: ptr.To[int32](2), + }, + Status: clusterv1.MachineSetStatus{ + Deprecated: &clusterv1.MachineSetDeprecatedStatus{ + V1Beta1: &clusterv1.MachineSetV1Beta1DeprecatedStatus{ + AvailableReplicas: 2, + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Namespace: "foo", + Name: "1replicas", + }, + Spec: clusterv1.MachineSetSpec{ + Replicas: ptr.To[int32](1), + }, + Status: clusterv1.MachineSetStatus{ + Deprecated: &clusterv1.MachineSetDeprecatedStatus{ + V1Beta1: &clusterv1.MachineSetV1Beta1DeprecatedStatus{ + AvailableReplicas: 1, + }, + }, + }, + }, + }, + expectedOldMachineSetsReplicas: 0, + }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { @@ -500,6 +579,9 @@ func TestReconcileOldMachineSets(t *testing.T) { err = r.Client.Get(ctx, client.ObjectKeyFromObject(tc.oldMachineSets[key]), freshOldMachineSet) g.Expect(err).ToNot(HaveOccurred()) g.Expect(*freshOldMachineSet.Spec.Replicas).To(BeEquivalentTo(tc.expectedOldMachineSetsReplicas)) + g.Expect(freshOldMachineSet.Spec.Template.Spec.NodeDrainTimeout).To(BeEquivalentTo(tc.machineDeployment.Spec.Template.Spec.NodeDrainTimeout)) + g.Expect(freshOldMachineSet.Spec.Template.Spec.NodeDeletionTimeout).To(BeEquivalentTo(tc.machineDeployment.Spec.Template.Spec.NodeDeletionTimeout)) + g.Expect(freshOldMachineSet.Spec.Template.Spec.NodeVolumeDetachTimeout).To(BeEquivalentTo(tc.machineDeployment.Spec.Template.Spec.NodeVolumeDetachTimeout)) } }) } diff --git a/internal/controllers/machinedeployment/machinedeployment_rollout_ondelete.go b/internal/controllers/machinedeployment/machinedeployment_rollout_ondelete.go index 73f770f76c0c..ad99a606c6d6 100644 --- a/internal/controllers/machinedeployment/machinedeployment_rollout_ondelete.go +++ b/internal/controllers/machinedeployment/machinedeployment_rollout_ondelete.go @@ -104,6 +104,9 @@ func (r *Reconciler) reconcileOldMachineSetsOnDelete(ctx context.Context, oldMSs return err } } + if err := r.propagateDeletionTimeoutsToOldMachineSet(ctx, oldMSs, deployment); err != nil { + return err + } selectorMap, err := metav1.LabelSelectorAsMap(&oldMS.Spec.Selector) if err != nil { log.V(4).Info("Failed to convert MachineSet label selector to a map", "err", err) diff --git a/internal/controllers/machinedeployment/machinedeployment_sync.go b/internal/controllers/machinedeployment/machinedeployment_sync.go index c6bebe9d12f7..541ea3f04860 100644 --- a/internal/controllers/machinedeployment/machinedeployment_sync.go +++ b/internal/controllers/machinedeployment/machinedeployment_sync.go @@ -669,3 +669,23 @@ func (r *Reconciler) cleanupDeployment(ctx context.Context, oldMSs []*clusterv1. return nil } + +func (r *Reconciler) propagateDeletionTimeoutsToOldMachineSet(ctx context.Context, oldMSs []*clusterv1.MachineSet, deployment *clusterv1.MachineDeployment) error { + for _, oldMS := range oldMSs { + patchHelper, err := patch.NewHelper(oldMS, r.Client) + if err != nil { + return errors.Wrapf(err, "failed to generate patch for MachineSet %q", klog.KObj(oldMS)) + } + + // Set all other in-place mutable fields that impact the ability to tear down existing machines. + oldMS.Spec.Template.Spec.NodeDrainTimeout = deployment.Spec.Template.Spec.NodeDrainTimeout + oldMS.Spec.Template.Spec.NodeDeletionTimeout = deployment.Spec.Template.Spec.NodeDeletionTimeout + oldMS.Spec.Template.Spec.NodeVolumeDetachTimeout = deployment.Spec.Template.Spec.NodeVolumeDetachTimeout + + err = patchHelper.Patch(ctx, oldMS) + if err != nil { + return errors.Wrapf(err, "failed to update MachineSet %q", klog.KObj(oldMS)) + } + } + return nil +}