mirror of https://github.com/fluxcd/flagger.git
[feat] primaryReadyThreshold: allow configuring threshold for primary
see #639 Signed-off-by: Mahdi Dibaiee <mdibaiee@pm.me>
This commit is contained in:
parent
9c7db58d87
commit
8f50521435
|
|
@ -826,6 +826,9 @@ spec:
|
|||
mirrorWeight:
|
||||
description: Weight of traffic to be mirrored
|
||||
type: number
|
||||
primaryReadyThreshold:
|
||||
description: Percentage of pods that need to be available to consider primary as ready
|
||||
type: number
|
||||
match:
|
||||
description: A/B testing match conditions
|
||||
type: array
|
||||
|
|
|
|||
|
|
@ -826,6 +826,9 @@ spec:
|
|||
mirrorWeight:
|
||||
description: Weight of traffic to be mirrored
|
||||
type: number
|
||||
primaryReadyThreshold:
|
||||
description: Percentage of pods that need to be available to consider primary as ready
|
||||
type: number
|
||||
match:
|
||||
description: A/B testing match conditions
|
||||
type: array
|
||||
|
|
|
|||
|
|
@ -335,6 +335,10 @@ Spec:
|
|||
# total number of iterations
|
||||
# used for A/B Testing and Blue/Green
|
||||
iterations:
|
||||
# threshold of primary pods that need to be available to consider it ready
|
||||
# before starting rollout. this is optional and the default is 100
|
||||
# percentage (0-100)
|
||||
primaryReadyThreshold: 100
|
||||
# canary match conditions
|
||||
# used for A/B Testing
|
||||
match:
|
||||
|
|
|
|||
|
|
@ -826,6 +826,9 @@ spec:
|
|||
mirrorWeight:
|
||||
description: Weight of traffic to be mirrored
|
||||
type: number
|
||||
primaryReadyThreshold:
|
||||
description: Percentage of pods that need to be available to consider primary as ready
|
||||
type: number
|
||||
match:
|
||||
description: A/B testing match conditions
|
||||
type: array
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ const (
|
|||
CanaryKind = "Canary"
|
||||
ProgressDeadlineSeconds = 600
|
||||
AnalysisInterval = 60 * time.Second
|
||||
PrimaryReadyThreshold = 100
|
||||
MetricInterval = "1m"
|
||||
)
|
||||
|
||||
|
|
@ -229,6 +230,9 @@ type CanaryAnalysis struct {
|
|||
// Max number of failed checks before the canary is terminated
|
||||
Threshold int `json:"threshold"`
|
||||
|
||||
// Percentage of pods that need to be available to consider primary as ready
|
||||
PrimaryReadyThreshold *int `json:"primaryReadyThreshold,omitempty"`
|
||||
|
||||
// Alert list for this canary analysis
|
||||
Alerts []CanaryAlert `json:"alerts,omitempty"`
|
||||
|
||||
|
|
@ -440,6 +444,14 @@ func (c *Canary) GetAnalysisThreshold() int {
|
|||
return 1
|
||||
}
|
||||
|
||||
// GetAnalysisPrimaryReadyThreshold returns the canary primaryReadyThreshold (default 100)
|
||||
func (c *Canary) GetAnalysisPrimaryReadyThreshold() int {
|
||||
if c.GetAnalysis().PrimaryReadyThreshold != nil {
|
||||
return *c.GetAnalysis().PrimaryReadyThreshold
|
||||
}
|
||||
return PrimaryReadyThreshold
|
||||
}
|
||||
|
||||
// GetMetricInterval returns the metric interval default value (1m)
|
||||
func (c *Canary) GetMetricInterval() string {
|
||||
return MetricInterval
|
||||
|
|
|
|||
|
|
@ -202,6 +202,11 @@ func (in *CanaryAnalysis) DeepCopyInto(out *CanaryAnalysis) {
|
|||
*out = make([]int, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
if in.PrimaryReadyThreshold != nil {
|
||||
in, out := &in.PrimaryReadyThreshold, &out.PrimaryReadyThreshold
|
||||
*out = new(int)
|
||||
**out = **in
|
||||
}
|
||||
if in.Alerts != nil {
|
||||
in, out := &in.Alerts, &out.Alerts
|
||||
*out = make([]CanaryAlert, len(*in))
|
||||
|
|
|
|||
|
|
@ -357,6 +357,7 @@ func newDaemonSetControllerTestCanary(dc daemonsetConfigs) *flaggerv1.Canary {
|
|||
APIVersion: "apps/v1",
|
||||
Kind: "DaemonSet",
|
||||
},
|
||||
Analysis: &flaggerv1.CanaryAnalysis{},
|
||||
},
|
||||
}
|
||||
return cd
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ func (c *DaemonSetController) IsPrimaryReady(cd *flaggerv1.Canary) error {
|
|||
return fmt.Errorf("daemonset %s.%s get query error: %w", primaryName, cd.Namespace, err)
|
||||
}
|
||||
|
||||
_, err = c.isDaemonSetReady(cd, primary)
|
||||
_, err = c.isDaemonSetReady(cd, primary, cd.GetAnalysisPrimaryReadyThreshold())
|
||||
if err != nil {
|
||||
return fmt.Errorf("primary daemonset %s.%s not ready: %w", primaryName, cd.Namespace, err)
|
||||
}
|
||||
|
|
@ -52,7 +52,7 @@ func (c *DaemonSetController) IsCanaryReady(cd *flaggerv1.Canary) (bool, error)
|
|||
return true, fmt.Errorf("daemonset %s.%s get query error: %w", targetName, cd.Namespace, err)
|
||||
}
|
||||
|
||||
retryable, err := c.isDaemonSetReady(cd, canary)
|
||||
retryable, err := c.isDaemonSetReady(cd, canary, 100)
|
||||
if err != nil {
|
||||
return retryable, fmt.Errorf("canary damonset %s.%s not ready with retryable %v: %w",
|
||||
targetName, cd.Namespace, retryable, err)
|
||||
|
|
@ -62,11 +62,14 @@ func (c *DaemonSetController) IsCanaryReady(cd *flaggerv1.Canary) (bool, error)
|
|||
|
||||
// isDaemonSetReady determines if a daemonset is ready by checking the number of old version daemons
|
||||
// reference: https://github.com/kubernetes/kubernetes/blob/5232ad4a00ec93942d0b2c6359ee6cd1201b46bc/pkg/kubectl/rollout_status.go#L110
|
||||
func (c *DaemonSetController) isDaemonSetReady(cd *flaggerv1.Canary, daemonSet *appsv1.DaemonSet) (bool, error) {
|
||||
func (c *DaemonSetController) isDaemonSetReady(cd *flaggerv1.Canary, daemonSet *appsv1.DaemonSet, readyThreshold int) (bool, error) {
|
||||
if daemonSet.Generation <= daemonSet.Status.ObservedGeneration {
|
||||
readyThresholdRatio := float32(readyThreshold) / float32(100)
|
||||
|
||||
// calculate conditions
|
||||
newCond := daemonSet.Status.UpdatedNumberScheduled < daemonSet.Status.DesiredNumberScheduled
|
||||
availableCond := daemonSet.Status.NumberAvailable < daemonSet.Status.DesiredNumberScheduled
|
||||
readyThresholdDesiredReplicas := int32(float32(daemonSet.Status.DesiredNumberScheduled) * readyThresholdRatio)
|
||||
availableCond := daemonSet.Status.NumberAvailable < readyThresholdDesiredReplicas
|
||||
if !newCond && !availableCond {
|
||||
return true, nil
|
||||
}
|
||||
|
|
@ -83,8 +86,8 @@ func (c *DaemonSetController) isDaemonSetReady(cd *flaggerv1.Canary, daemonSet *
|
|||
return true, fmt.Errorf("waiting for rollout to finish: %d out of %d new pods have been updated",
|
||||
daemonSet.Status.UpdatedNumberScheduled, daemonSet.Status.DesiredNumberScheduled)
|
||||
} else if availableCond {
|
||||
return true, fmt.Errorf("waiting for rollout to finish: %d of %d updated pods are available",
|
||||
daemonSet.Status.NumberAvailable, daemonSet.Status.DesiredNumberScheduled)
|
||||
return true, fmt.Errorf("waiting for rollout to finish: %d of %d (readyThreshold %d%%) updated pods are available",
|
||||
daemonSet.Status.NumberAvailable, readyThresholdDesiredReplicas, readyThreshold)
|
||||
}
|
||||
}
|
||||
return true, fmt.Errorf("waiting for rollout to finish: observed daemonset generation less than desired generation")
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) {
|
|||
// observed generation is less than desired generation
|
||||
ds := &appsv1.DaemonSet{Status: appsv1.DaemonSetStatus{}}
|
||||
ds.Status.ObservedGeneration--
|
||||
retryable, err := mocks.controller.isDaemonSetReady(cd, ds)
|
||||
retryable, err := mocks.controller.isDaemonSetReady(cd, ds, 100)
|
||||
require.Error(t, err)
|
||||
require.True(t, retryable)
|
||||
|
||||
|
|
@ -58,7 +58,7 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) {
|
|||
DesiredNumberScheduled: 1,
|
||||
NumberAvailable: 1,
|
||||
}}
|
||||
retryable, err = mocks.controller.isDaemonSetReady(cd, ds)
|
||||
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 100)
|
||||
require.NoError(t, err)
|
||||
require.True(t, retryable)
|
||||
|
||||
|
|
@ -69,7 +69,7 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) {
|
|||
}}
|
||||
cd.Status.LastTransitionTime = metav1.Now()
|
||||
cd.Spec.ProgressDeadlineSeconds = int32p(-1e6)
|
||||
retryable, err = mocks.controller.isDaemonSetReady(cd, ds)
|
||||
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 100)
|
||||
require.Error(t, err)
|
||||
require.False(t, retryable)
|
||||
|
||||
|
|
@ -80,7 +80,7 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) {
|
|||
NumberAvailable: 1,
|
||||
}}
|
||||
cd.Spec.ProgressDeadlineSeconds = int32p(1e6)
|
||||
retryable, err = mocks.controller.isDaemonSetReady(cd, ds)
|
||||
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 100)
|
||||
require.Error(t, err)
|
||||
require.True(t, retryable)
|
||||
require.True(t, strings.Contains(err.Error(), "new pods"))
|
||||
|
|
@ -91,7 +91,46 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) {
|
|||
DesiredNumberScheduled: 1,
|
||||
NumberAvailable: 0,
|
||||
}}
|
||||
retryable, err = mocks.controller.isDaemonSetReady(cd, ds)
|
||||
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 100)
|
||||
require.Error(t, err)
|
||||
require.True(t, retryable)
|
||||
require.True(t, strings.Contains(err.Error(), "available"))
|
||||
}
|
||||
|
||||
func TestDaemonSetController_isDaemonSetReady_readyThreshold(t *testing.T) {
|
||||
dc := daemonsetConfigs{name: "podinfo", label: "name", labelValue: "podinfo"}
|
||||
mocks := newDaemonSetFixture(dc)
|
||||
cd := &flaggerv1.Canary{}
|
||||
|
||||
// succeeded
|
||||
ds := &appsv1.DaemonSet{Status: appsv1.DaemonSetStatus{
|
||||
UpdatedNumberScheduled: 1,
|
||||
DesiredNumberScheduled: 1,
|
||||
NumberAvailable: 1,
|
||||
}}
|
||||
retryable, err := mocks.controller.isDaemonSetReady(cd, ds, 50)
|
||||
require.NoError(t, err)
|
||||
require.True(t, retryable)
|
||||
|
||||
// waiting for updated ones to be available, 50% available, ok
|
||||
ds = &appsv1.DaemonSet{Status: appsv1.DaemonSetStatus{
|
||||
UpdatedNumberScheduled: 4,
|
||||
DesiredNumberScheduled: 4,
|
||||
NumberAvailable: 2,
|
||||
}}
|
||||
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 50)
|
||||
require.NoError(t, err)
|
||||
require.True(t, retryable)
|
||||
|
||||
// waiting for updated ones to be available, less than 50% available
|
||||
ds = &appsv1.DaemonSet{Status: appsv1.DaemonSetStatus{
|
||||
UpdatedNumberScheduled: 4,
|
||||
DesiredNumberScheduled: 4,
|
||||
NumberAvailable: 1,
|
||||
}}
|
||||
cd.Status.LastTransitionTime = metav1.Now()
|
||||
cd.Spec.ProgressDeadlineSeconds = int32p(1e6)
|
||||
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 50)
|
||||
require.Error(t, err)
|
||||
require.True(t, retryable)
|
||||
require.True(t, strings.Contains(err.Error(), "available"))
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ func (c *DeploymentController) IsPrimaryReady(cd *flaggerv1.Canary) error {
|
|||
return fmt.Errorf("deployment %s.%s get query error: %w", primaryName, cd.Namespace, err)
|
||||
}
|
||||
|
||||
_, err = c.isDeploymentReady(primary, cd.GetProgressDeadlineSeconds())
|
||||
_, err = c.isDeploymentReady(primary, cd.GetProgressDeadlineSeconds(), cd.GetAnalysisPrimaryReadyThreshold())
|
||||
if err != nil {
|
||||
return fmt.Errorf("%s.%s not ready: %w", primaryName, cd.Namespace, err)
|
||||
}
|
||||
|
|
@ -59,7 +59,7 @@ func (c *DeploymentController) IsCanaryReady(cd *flaggerv1.Canary) (bool, error)
|
|||
return true, fmt.Errorf("deployment %s.%s get query error: %w", targetName, cd.Namespace, err)
|
||||
}
|
||||
|
||||
retryable, err := c.isDeploymentReady(canary, cd.GetProgressDeadlineSeconds())
|
||||
retryable, err := c.isDeploymentReady(canary, cd.GetProgressDeadlineSeconds(), 100)
|
||||
if err != nil {
|
||||
return retryable, fmt.Errorf(
|
||||
"canary deployment %s.%s not ready: %w",
|
||||
|
|
@ -71,7 +71,7 @@ func (c *DeploymentController) IsCanaryReady(cd *flaggerv1.Canary) (bool, error)
|
|||
|
||||
// isDeploymentReady determines if a deployment is ready by checking the status conditions
|
||||
// if a deployment has exceeded the progress deadline it returns a non retriable error
|
||||
func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment, deadline int) (bool, error) {
|
||||
func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment, deadline int, readyThreshold int) (bool, error) {
|
||||
retriable := true
|
||||
if deployment.Generation <= deployment.Status.ObservedGeneration {
|
||||
progress := c.getDeploymentCondition(deployment.Status, appsv1.DeploymentProgressing)
|
||||
|
|
@ -86,6 +86,9 @@ func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment,
|
|||
}
|
||||
}
|
||||
|
||||
readyThresholdRatio := float32(readyThreshold) / float32(100)
|
||||
readyThresholdUpdatedReplicas := int32(float32(deployment.Status.UpdatedReplicas) * readyThresholdRatio)
|
||||
|
||||
if progress != nil && progress.Reason == "ProgressDeadlineExceeded" {
|
||||
return false, fmt.Errorf("deployment %q exceeded its progress deadline", deployment.GetName())
|
||||
} else if deployment.Spec.Replicas != nil && deployment.Status.UpdatedReplicas < *deployment.Spec.Replicas {
|
||||
|
|
@ -94,9 +97,9 @@ func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment,
|
|||
} else if deployment.Status.Replicas > deployment.Status.UpdatedReplicas {
|
||||
return retriable, fmt.Errorf("waiting for rollout to finish: %d old replicas are pending termination",
|
||||
deployment.Status.Replicas-deployment.Status.UpdatedReplicas)
|
||||
} else if deployment.Status.AvailableReplicas < deployment.Status.UpdatedReplicas {
|
||||
return retriable, fmt.Errorf("waiting for rollout to finish: %d of %d updated replicas are available",
|
||||
deployment.Status.AvailableReplicas, deployment.Status.UpdatedReplicas)
|
||||
} else if deployment.Status.AvailableReplicas < readyThresholdUpdatedReplicas {
|
||||
return retriable, fmt.Errorf("waiting for rollout to finish: %d of %d (readyThreshold %d%%) updated replicas are available",
|
||||
deployment.Status.AvailableReplicas, readyThresholdUpdatedReplicas, readyThreshold)
|
||||
}
|
||||
} else {
|
||||
return true, fmt.Errorf(
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
|
|||
|
||||
// observed generation is less than desired generation
|
||||
dp := &appsv1.Deployment{Status: appsv1.DeploymentStatus{ObservedGeneration: -1}}
|
||||
retryable, err := mocks.controller.isDeploymentReady(dp, 0)
|
||||
retryable, err := mocks.controller.isDeploymentReady(dp, 0, 100)
|
||||
assert.Error(t, err)
|
||||
assert.True(t, retryable)
|
||||
assert.True(t, strings.Contains(err.Error(), "generation"))
|
||||
|
|
@ -57,7 +57,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
|
|||
ReadyReplicas: 1,
|
||||
AvailableReplicas: 1,
|
||||
}}
|
||||
retryable, err = mocks.controller.isDeploymentReady(dp, 0)
|
||||
retryable, err = mocks.controller.isDeploymentReady(dp, 0, 100)
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, retryable)
|
||||
|
||||
|
|
@ -67,7 +67,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
|
|||
}, Spec: appsv1.DeploymentSpec{
|
||||
Replicas: int32p(2),
|
||||
}}
|
||||
_, err = mocks.controller.isDeploymentReady(dp, 0)
|
||||
_, err = mocks.controller.isDeploymentReady(dp, 0, 100)
|
||||
assert.Error(t, err)
|
||||
assert.True(t, strings.Contains(err.Error(), "new replicas"))
|
||||
|
||||
|
|
@ -76,7 +76,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
|
|||
UpdatedReplicas: 1,
|
||||
Replicas: 2,
|
||||
}}
|
||||
_, err = mocks.controller.isDeploymentReady(dp, 0)
|
||||
_, err = mocks.controller.isDeploymentReady(dp, 0, 100)
|
||||
assert.Error(t, err)
|
||||
assert.True(t, strings.Contains(err.Error(), "termination"))
|
||||
|
||||
|
|
@ -85,7 +85,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
|
|||
UpdatedReplicas: 2,
|
||||
AvailableReplicas: 1,
|
||||
}}
|
||||
_, err = mocks.controller.isDeploymentReady(dp, 0)
|
||||
_, err = mocks.controller.isDeploymentReady(dp, 0, 100)
|
||||
assert.Error(t, err)
|
||||
assert.True(t, strings.Contains(err.Error(), "available"))
|
||||
|
||||
|
|
@ -94,7 +94,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
|
|||
Conditions: []appsv1.DeploymentCondition{
|
||||
{Type: appsv1.DeploymentProgressing, Reason: "ProgressDeadlineExceeded"}},
|
||||
}}
|
||||
retryable, err = mocks.controller.isDeploymentReady(dp, 0)
|
||||
retryable, err = mocks.controller.isDeploymentReady(dp, 0, 100)
|
||||
assert.Error(t, err)
|
||||
assert.False(t, retryable)
|
||||
assert.True(t, strings.Contains(err.Error(), "exceeded"))
|
||||
|
|
@ -113,7 +113,48 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
|
|||
UpdatedReplicas: 2,
|
||||
AvailableReplicas: 1,
|
||||
}}
|
||||
retryable, err = mocks.controller.isDeploymentReady(dp, 5)
|
||||
retryable, err = mocks.controller.isDeploymentReady(dp, 5, 100)
|
||||
assert.Error(t, err)
|
||||
assert.False(t, retryable)
|
||||
}
|
||||
|
||||
func TestDeploymentController_isDeploymentReady_readyThreshold(t *testing.T) {
|
||||
dc := deploymentConfigs{name: "podinfo", label: "name", labelValue: "podinfo"}
|
||||
mocks := newDeploymentFixture(dc)
|
||||
|
||||
// observed generation is less than desired generation
|
||||
dp := &appsv1.Deployment{Status: appsv1.DeploymentStatus{ObservedGeneration: -1}}
|
||||
retryable, err := mocks.controller.isDeploymentReady(dp, 0, 50)
|
||||
assert.Error(t, err)
|
||||
assert.True(t, retryable)
|
||||
assert.True(t, strings.Contains(err.Error(), "generation"))
|
||||
|
||||
// ok
|
||||
dp = &appsv1.Deployment{Status: appsv1.DeploymentStatus{
|
||||
Replicas: 1,
|
||||
UpdatedReplicas: 1,
|
||||
ReadyReplicas: 1,
|
||||
AvailableReplicas: 1,
|
||||
}}
|
||||
retryable, err = mocks.controller.isDeploymentReady(dp, 0, 50)
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, retryable)
|
||||
|
||||
// waiting for updated ones to be available, 50% is available, ok
|
||||
dp = &appsv1.Deployment{Status: appsv1.DeploymentStatus{
|
||||
UpdatedReplicas: 4,
|
||||
AvailableReplicas: 2,
|
||||
}}
|
||||
retryable, err = mocks.controller.isDeploymentReady(dp, 0, 50)
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, retryable)
|
||||
|
||||
// waiting for updated ones to be available, less than 50% available
|
||||
dp = &appsv1.Deployment{Status: appsv1.DeploymentStatus{
|
||||
UpdatedReplicas: 4,
|
||||
AvailableReplicas: 1,
|
||||
}}
|
||||
retryable, err = mocks.controller.isDeploymentReady(dp, 0, 50)
|
||||
assert.Error(t, err)
|
||||
assert.True(t, strings.Contains(err.Error(), "available"))
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue