open-cluster-management-io
diff --git a/‎addon/v1alpha1/0000_00_addon.open-cluster-management.io_clustermanagementaddons.crd.yaml
Lines changed: 63 additions & 75 deletions b/‎addon/v1alpha1/0000_00_addon.open-cluster-management.io_clustermanagementaddons.crd.yaml
Lines changed: 63 additions & 75 deletions
diff --git a/‎cluster/v1alpha1/helpers.go
Lines changed: 10 additions & 2 deletions b/‎cluster/v1alpha1/helpers.go
Lines changed: 10 additions & 2 deletions
diff --git a/‎cluster/v1alpha1/types_rolloutstrategy.go
Lines changed: 6 additions & 12 deletions b/‎cluster/v1alpha1/types_rolloutstrategy.go
Lines changed: 6 additions & 12 deletions
diff --git a/‎cluster/v1alpha1/zz_generated.deepcopy.go
Lines changed: 6 additions & 0 deletions b/‎cluster/v1alpha1/zz_generated.deepcopy.go
Lines changed: 6 additions & 0 deletions
@@ -155,15 +155,19 @@ spec:
                                   default: 0
                                   description: MaxFailures is a percentage or number
                                     of clusters in the current rollout that can fail
-                                    before proceeding to the next rollout. MaxFailures
-                                    is only considered for rollout types Progressive
-                                    and ProgressivePerGroup. For Progressive, this
-                                    is considered over the total number of clusters.
-                                    For ProgressivePerGroup, this is considered according
-                                    to the size of the current group. For both Progressive
-                                    and ProgressivePerGroup, the MaxFailures does
-                                    not apply for MandatoryDecisionGroups, which tolerate
-                                    no failures. Default is that no failures are tolerated.
+                                    before proceeding to the next rollout. Fail means
+                                    the cluster has a failed status or timeout status
+                                    (does not reach successful status after ProgressDeadline).
+                                    Once the MaxFailures is breached, the rollout
+                                    will stop. MaxFailures is only considered for
+                                    rollout types Progressive and ProgressivePerGroup.
+                                    For Progressive, this is considered over the total
+                                    number of clusters. For ProgressivePerGroup, this
+                                    is considered according to the size of the current
+                                    group. For both Progressive and ProgressivePerGroup,
+                                    the MaxFailures does not apply for MandatoryDecisionGroups,
+                                    which tolerate no failures. Default is that no
+                                    failures are tolerated.
                                   pattern: ^((100|[0-9]{1,2})%|[0-9]+)$
                                   x-kubernetes-int-or-string: true
                                 minSuccessTime:
@@ -185,26 +189,18 @@ spec:
                                   default: None
                                   description: ProgressDeadline defines how long workload
                                     applier controller will wait for the workload
-                                    to reach a successful state in the cluster. ProgressDeadline
-                                    default value is "None", meaning the workload
-                                    applier will wait for a successful state indefinitely.
+                                    to reach a successful state in the cluster. If
+                                    the workload does not reach a successful state
+                                    after ProgressDeadline, will stop waiting and
+                                    workload will be treated as "timeout" and be counted
+                                    into MaxFailures. Once the MaxFailures is breached,
+                                    the rollout will stop. ProgressDeadline default
+                                    value is "None", meaning the workload applier
+                                    will wait for a successful state indefinitely.
                                     ProgressDeadline must be defined in [0-9h]|[0-9m]|[0-9s]
                                     format examples; 2h , 90m , 360s
                                   pattern: ^(([0-9])+[h|m|s])|None$
                                   type: string
-                                timeout:
-                                  default: None
-                                  description: "Timeout defines how long the workload
-                                    applier controller will wait until the workload
-                                    reaches a successful state in the cluster. Timeout
-                                    default value is None meaning the workload applier
-                                    will not proceed apply workload to other clusters
-                                    if did not reach the successful state. Timeout
-                                    must be defined in [0-9h]|[0-9m]|[0-9s] format
-                                    examples; 2h , 90m , 360s \n Deprecated: Use ProgressDeadline
-                                    instead."
-                                  pattern: ^(([0-9])+[h|m|s])|None$
-                                  type: string
                               type: object
                             progressive:
                               description: Progressive defines required fields for
@@ -252,15 +248,19 @@ spec:
                                   default: 0
                                   description: MaxFailures is a percentage or number
                                     of clusters in the current rollout that can fail
-                                    before proceeding to the next rollout. MaxFailures
-                                    is only considered for rollout types Progressive
-                                    and ProgressivePerGroup. For Progressive, this
-                                    is considered over the total number of clusters.
-                                    For ProgressivePerGroup, this is considered according
-                                    to the size of the current group. For both Progressive
-                                    and ProgressivePerGroup, the MaxFailures does
-                                    not apply for MandatoryDecisionGroups, which tolerate
-                                    no failures. Default is that no failures are tolerated.
+                                    before proceeding to the next rollout. Fail means
+                                    the cluster has a failed status or timeout status
+                                    (does not reach successful status after ProgressDeadline).
+                                    Once the MaxFailures is breached, the rollout
+                                    will stop. MaxFailures is only considered for
+                                    rollout types Progressive and ProgressivePerGroup.
+                                    For Progressive, this is considered over the total
+                                    number of clusters. For ProgressivePerGroup, this
+                                    is considered according to the size of the current
+                                    group. For both Progressive and ProgressivePerGroup,
+                                    the MaxFailures does not apply for MandatoryDecisionGroups,
+                                    which tolerate no failures. Default is that no
+                                    failures are tolerated.
                                   pattern: ^((100|[0-9]{1,2})%|[0-9]+)$
                                   x-kubernetes-int-or-string: true
                                 minSuccessTime:
@@ -282,26 +282,18 @@ spec:
                                   default: None
                                   description: ProgressDeadline defines how long workload
                                     applier controller will wait for the workload
-                                    to reach a successful state in the cluster. ProgressDeadline
-                                    default value is "None", meaning the workload
-                                    applier will wait for a successful state indefinitely.
+                                    to reach a successful state in the cluster. If
+                                    the workload does not reach a successful state
+                                    after ProgressDeadline, will stop waiting and
+                                    workload will be treated as "timeout" and be counted
+                                    into MaxFailures. Once the MaxFailures is breached,
+                                    the rollout will stop. ProgressDeadline default
+                                    value is "None", meaning the workload applier
+                                    will wait for a successful state indefinitely.
                                     ProgressDeadline must be defined in [0-9h]|[0-9m]|[0-9s]
                                     format examples; 2h , 90m , 360s
                                   pattern: ^(([0-9])+[h|m|s])|None$
                                   type: string
-                                timeout:
-                                  default: None
-                                  description: "Timeout defines how long the workload
-                                    applier controller will wait until the workload
-                                    reaches a successful state in the cluster. Timeout
-                                    default value is None meaning the workload applier
-                                    will not proceed apply workload to other clusters
-                                    if did not reach the successful state. Timeout
-                                    must be defined in [0-9h]|[0-9m]|[0-9s] format
-                                    examples; 2h , 90m , 360s \n Deprecated: Use ProgressDeadline
-                                    instead."
-                                  pattern: ^(([0-9])+[h|m|s])|None$
-                                  type: string
                               type: object
                             progressivePerGroup:
                               description: ProgressivePerGroup defines required fields
@@ -338,15 +330,19 @@ spec:
                                   default: 0
                                   description: MaxFailures is a percentage or number
                                     of clusters in the current rollout that can fail
-                                    before proceeding to the next rollout. MaxFailures
-                                    is only considered for rollout types Progressive
-                                    and ProgressivePerGroup. For Progressive, this
-                                    is considered over the total number of clusters.
-                                    For ProgressivePerGroup, this is considered according
-                                    to the size of the current group. For both Progressive
-                                    and ProgressivePerGroup, the MaxFailures does
-                                    not apply for MandatoryDecisionGroups, which tolerate
-                                    no failures. Default is that no failures are tolerated.
+                                    before proceeding to the next rollout. Fail means
+                                    the cluster has a failed status or timeout status
+                                    (does not reach successful status after ProgressDeadline).
+                                    Once the MaxFailures is breached, the rollout
+                                    will stop. MaxFailures is only considered for
+                                    rollout types Progressive and ProgressivePerGroup.
+                                    For Progressive, this is considered over the total
+                                    number of clusters. For ProgressivePerGroup, this
+                                    is considered according to the size of the current
+                                    group. For both Progressive and ProgressivePerGroup,
+                                    the MaxFailures does not apply for MandatoryDecisionGroups,
+                                    which tolerate no failures. Default is that no
+                                    failures are tolerated.
                                   pattern: ^((100|[0-9]{1,2})%|[0-9]+)$
                                   x-kubernetes-int-or-string: true
                                 minSuccessTime:
@@ -368,26 +364,18 @@ spec:
                                   default: None
                                   description: ProgressDeadline defines how long workload
                                     applier controller will wait for the workload
-                                    to reach a successful state in the cluster. ProgressDeadline
-                                    default value is "None", meaning the workload
-                                    applier will wait for a successful state indefinitely.
+                                    to reach a successful state in the cluster. If
+                                    the workload does not reach a successful state
+                                    after ProgressDeadline, will stop waiting and
+                                    workload will be treated as "timeout" and be counted
+                                    into MaxFailures. Once the MaxFailures is breached,
+                                    the rollout will stop. ProgressDeadline default
+                                    value is "None", meaning the workload applier
+                                    will wait for a successful state indefinitely.
                                     ProgressDeadline must be defined in [0-9h]|[0-9m]|[0-9s]
                                     format examples; 2h , 90m , 360s
                                   pattern: ^(([0-9])+[h|m|s])|None$
                                   type: string
-                                timeout:
-                                  default: None
-                                  description: "Timeout defines how long the workload
-                                    applier controller will wait until the workload
-                                    reaches a successful state in the cluster. Timeout
-                                    default value is None meaning the workload applier
-                                    will not proceed apply workload to other clusters
-                                    if did not reach the successful state. Timeout
-                                    must be defined in [0-9h]|[0-9m]|[0-9s] format
-                                    examples; 2h , 90m , 360s \n Deprecated: Use ProgressDeadline
-                                    instead."
-                                  pattern: ^(([0-9])+[h|m|s])|None$
-                                  type: string
                               type: object
                             type:
                               default: All
 
@@ -283,7 +283,7 @@ func progressivePerCluster(
 		return existingClusterStatus[i].ClusterName < existingClusterStatus[j].ClusterName
 	})
 
-	// Collect current cluster status and determine any TimeOut statuses
+	// Collect existing cluster status and determine any TimeOut statuses
 	for _, status := range existingClusterStatus {
 		if status.ClusterName == "" {
 			continue
@@ -293,6 +293,10 @@ func progressivePerCluster(
 
 		// If there was a breach of MaxFailures, only handle clusters that have already had workload applied
 		if !failureBreach || failureBreach && status.Status != ToApply {
+			// For progress per cluster, the length of existing `rolloutClusters` will be compared with the
+			// target rollout size to determine whether to return or not first.
+			// The timeoutClusters, as well as failed clusters will be counted into failureCount, the next rollout
+			// will stop if failureCount > maxFailures.
 			rolloutClusters, timeoutClusters = determineRolloutStatus(&status, minSuccessTime, timeout, rolloutClusters, timeoutClusters)
 		}
 
@@ -303,7 +307,7 @@ func progressivePerCluster(
 			failureBreach = failureCount > maxFailures
 		}
 
-		// Return if the list of rollout clusters has reached the target rollout size
+		// Return if the list of exsiting rollout clusters has reached the target rollout size
 		if len(rolloutClusters) >= rolloutSize {
 			return RolloutResult{
 				ClustersToRollout: rolloutClusters,
@@ -314,6 +318,7 @@ func progressivePerCluster(
 		}
 	}
 
+	// Return if the exsiting rollout clusters maxFailures is breached.
 	if failureBreach {
 		return RolloutResult{
 			ClustersToRollout: rolloutClusters,
@@ -370,13 +375,16 @@ func progressivePerGroup(
 	var rolloutClusters, timeoutClusters []ClusterRolloutStatus
 	existingClusters := make(map[string]RolloutStatus)
 
+	// Collect existing cluster status and determine any TimeOut statuses
 	for _, status := range existingClusterStatus {
 		if status.ClusterName == "" {
 			continue
 		}
 
 		// ToApply will be reconsidered in the decisionGroups iteration.
 		if status.Status != ToApply {
+			// For progress per group, the existing rollout clusters and timeout clusters status will be recored in existingClusters first,
+			// then go through group by group.
 			rolloutClusters, timeoutClusters = determineRolloutStatus(&status, minSuccessTime, timeout, rolloutClusters, timeoutClusters)
 			existingClusters[status.ClusterName] = status.Status
 		}
 
@@ -64,6 +64,9 @@ type RolloutConfig struct {
 	MinSuccessTime metav1.Duration `json:"minSuccessTime,omitempty"`
 	// ProgressDeadline defines how long workload applier controller will wait for the workload to
 	// reach a successful state in the cluster.
+	// If the workload does not reach a successful state after ProgressDeadline, will stop waiting
+	// and workload will be treated as "timeout" and be counted into MaxFailures. Once the MaxFailures
+	// is breached, the rollout will stop.
 	// ProgressDeadline default value is "None", meaning the workload applier will wait for a
 	// successful state indefinitely.
 	// ProgressDeadline must be defined in [0-9h]|[0-9m]|[0-9s] format examples; 2h , 90m , 360s
@@ -72,7 +75,9 @@ type RolloutConfig struct {
 	// +optional
 	ProgressDeadline string `json:"progressDeadline,omitempty"`
 	// MaxFailures is a percentage or number of clusters in the current rollout that can fail before
-	// proceeding to the next rollout.
+	// proceeding to the next rollout. Fail means the cluster has a failed status or timeout status
+	// (does not reach successful status after ProgressDeadline).
+	// Once the MaxFailures is breached, the rollout will stop.
 	// MaxFailures is only considered for rollout types Progressive and ProgressivePerGroup. For
 	// Progressive, this is considered over the total number of clusters. For ProgressivePerGroup,
 	// this is considered according to the size of the current group. For both Progressive and
@@ -84,17 +89,6 @@ type RolloutConfig struct {
 	// +kubebuilder:default=0
 	// +optional
 	MaxFailures intstr.IntOrString `json:"maxFailures,omitempty"`
-	// Timeout defines how long the workload applier controller will wait until the workload reaches a
-	// successful state in the cluster.
-	// Timeout default value is None meaning the workload applier will not proceed apply workload to
-	// other clusters if did not reach the successful state.
-	// Timeout must be defined in [0-9h]|[0-9m]|[0-9s] format examples; 2h , 90m , 360s
-	//
-	// Deprecated: Use ProgressDeadline instead.
-	// +kubebuilder:validation:Pattern="^(([0-9])+[h|m|s])|None$"
-	// +kubebuilder:default:="None"
-	// +optional
-	Timeout string `json:"timeout,omitempty"`
 }
 
 // MandatoryDecisionGroup set the decision group name or group index.