[feat] primaryReadyThreshold: allow configuring threshold for primary

see #639

Signed-off-by: Mahdi Dibaiee <mdibaiee@pm.me>
This commit is contained in:
Mahdi Dibaiee 2021-11-10 14:04:02 +00:00
parent 9c7db58d87
commit 8f50521435
11 changed files with 141 additions and 24 deletions

View File

@ -826,6 +826,9 @@ spec:
mirrorWeight:
description: Weight of traffic to be mirrored
type: number
primaryReadyThreshold:
description: Percentage of pods that need to be available to consider primary as ready
type: number
match:
description: A/B testing match conditions
type: array

View File

@ -826,6 +826,9 @@ spec:
mirrorWeight:
description: Weight of traffic to be mirrored
type: number
primaryReadyThreshold:
description: Percentage of pods that need to be available to consider primary as ready
type: number
match:
description: A/B testing match conditions
type: array

View File

@ -335,6 +335,10 @@ Spec:
# total number of iterations
# used for A/B Testing and Blue/Green
iterations:
# threshold of primary pods that need to be available to consider it ready
# before starting rollout. this is optional and the default is 100
# percentage (0-100)
primaryReadyThreshold: 100
# canary match conditions
# used for A/B Testing
match:

View File

@ -826,6 +826,9 @@ spec:
mirrorWeight:
description: Weight of traffic to be mirrored
type: number
primaryReadyThreshold:
description: Percentage of pods that need to be available to consider primary as ready
type: number
match:
description: A/B testing match conditions
type: array

View File

@ -29,6 +29,7 @@ const (
CanaryKind = "Canary"
ProgressDeadlineSeconds = 600
AnalysisInterval = 60 * time.Second
PrimaryReadyThreshold = 100
MetricInterval = "1m"
)
@ -229,6 +230,9 @@ type CanaryAnalysis struct {
// Max number of failed checks before the canary is terminated
Threshold int `json:"threshold"`
// Percentage of pods that need to be available to consider primary as ready
PrimaryReadyThreshold *int `json:"primaryReadyThreshold,omitempty"`
// Alert list for this canary analysis
Alerts []CanaryAlert `json:"alerts,omitempty"`
@ -440,6 +444,14 @@ func (c *Canary) GetAnalysisThreshold() int {
return 1
}
// GetAnalysisPrimaryReadyThreshold returns the canary primaryReadyThreshold (default 100)
func (c *Canary) GetAnalysisPrimaryReadyThreshold() int {
if c.GetAnalysis().PrimaryReadyThreshold != nil {
return *c.GetAnalysis().PrimaryReadyThreshold
}
return PrimaryReadyThreshold
}
// GetMetricInterval returns the metric interval default value (1m)
func (c *Canary) GetMetricInterval() string {
return MetricInterval

View File

@ -202,6 +202,11 @@ func (in *CanaryAnalysis) DeepCopyInto(out *CanaryAnalysis) {
*out = make([]int, len(*in))
copy(*out, *in)
}
if in.PrimaryReadyThreshold != nil {
in, out := &in.PrimaryReadyThreshold, &out.PrimaryReadyThreshold
*out = new(int)
**out = **in
}
if in.Alerts != nil {
in, out := &in.Alerts, &out.Alerts
*out = make([]CanaryAlert, len(*in))

View File

@ -357,6 +357,7 @@ func newDaemonSetControllerTestCanary(dc daemonsetConfigs) *flaggerv1.Canary {
APIVersion: "apps/v1",
Kind: "DaemonSet",
},
Analysis: &flaggerv1.CanaryAnalysis{},
},
}
return cd

View File

@ -36,7 +36,7 @@ func (c *DaemonSetController) IsPrimaryReady(cd *flaggerv1.Canary) error {
return fmt.Errorf("daemonset %s.%s get query error: %w", primaryName, cd.Namespace, err)
}
_, err = c.isDaemonSetReady(cd, primary)
_, err = c.isDaemonSetReady(cd, primary, cd.GetAnalysisPrimaryReadyThreshold())
if err != nil {
return fmt.Errorf("primary daemonset %s.%s not ready: %w", primaryName, cd.Namespace, err)
}
@ -52,7 +52,7 @@ func (c *DaemonSetController) IsCanaryReady(cd *flaggerv1.Canary) (bool, error)
return true, fmt.Errorf("daemonset %s.%s get query error: %w", targetName, cd.Namespace, err)
}
retryable, err := c.isDaemonSetReady(cd, canary)
retryable, err := c.isDaemonSetReady(cd, canary, 100)
if err != nil {
return retryable, fmt.Errorf("canary damonset %s.%s not ready with retryable %v: %w",
targetName, cd.Namespace, retryable, err)
@ -62,11 +62,14 @@ func (c *DaemonSetController) IsCanaryReady(cd *flaggerv1.Canary) (bool, error)
// isDaemonSetReady determines if a daemonset is ready by checking the number of old version daemons
// reference: https://github.com/kubernetes/kubernetes/blob/5232ad4a00ec93942d0b2c6359ee6cd1201b46bc/pkg/kubectl/rollout_status.go#L110
func (c *DaemonSetController) isDaemonSetReady(cd *flaggerv1.Canary, daemonSet *appsv1.DaemonSet) (bool, error) {
func (c *DaemonSetController) isDaemonSetReady(cd *flaggerv1.Canary, daemonSet *appsv1.DaemonSet, readyThreshold int) (bool, error) {
if daemonSet.Generation <= daemonSet.Status.ObservedGeneration {
readyThresholdRatio := float32(readyThreshold) / float32(100)
// calculate conditions
newCond := daemonSet.Status.UpdatedNumberScheduled < daemonSet.Status.DesiredNumberScheduled
availableCond := daemonSet.Status.NumberAvailable < daemonSet.Status.DesiredNumberScheduled
readyThresholdDesiredReplicas := int32(float32(daemonSet.Status.DesiredNumberScheduled) * readyThresholdRatio)
availableCond := daemonSet.Status.NumberAvailable < readyThresholdDesiredReplicas
if !newCond && !availableCond {
return true, nil
}
@ -83,8 +86,8 @@ func (c *DaemonSetController) isDaemonSetReady(cd *flaggerv1.Canary, daemonSet *
return true, fmt.Errorf("waiting for rollout to finish: %d out of %d new pods have been updated",
daemonSet.Status.UpdatedNumberScheduled, daemonSet.Status.DesiredNumberScheduled)
} else if availableCond {
return true, fmt.Errorf("waiting for rollout to finish: %d of %d updated pods are available",
daemonSet.Status.NumberAvailable, daemonSet.Status.DesiredNumberScheduled)
return true, fmt.Errorf("waiting for rollout to finish: %d of %d (readyThreshold %d%%) updated pods are available",
daemonSet.Status.NumberAvailable, readyThresholdDesiredReplicas, readyThreshold)
}
}
return true, fmt.Errorf("waiting for rollout to finish: observed daemonset generation less than desired generation")

View File

@ -48,7 +48,7 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) {
// observed generation is less than desired generation
ds := &appsv1.DaemonSet{Status: appsv1.DaemonSetStatus{}}
ds.Status.ObservedGeneration--
retryable, err := mocks.controller.isDaemonSetReady(cd, ds)
retryable, err := mocks.controller.isDaemonSetReady(cd, ds, 100)
require.Error(t, err)
require.True(t, retryable)
@ -58,7 +58,7 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) {
DesiredNumberScheduled: 1,
NumberAvailable: 1,
}}
retryable, err = mocks.controller.isDaemonSetReady(cd, ds)
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 100)
require.NoError(t, err)
require.True(t, retryable)
@ -69,7 +69,7 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) {
}}
cd.Status.LastTransitionTime = metav1.Now()
cd.Spec.ProgressDeadlineSeconds = int32p(-1e6)
retryable, err = mocks.controller.isDaemonSetReady(cd, ds)
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 100)
require.Error(t, err)
require.False(t, retryable)
@ -80,7 +80,7 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) {
NumberAvailable: 1,
}}
cd.Spec.ProgressDeadlineSeconds = int32p(1e6)
retryable, err = mocks.controller.isDaemonSetReady(cd, ds)
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 100)
require.Error(t, err)
require.True(t, retryable)
require.True(t, strings.Contains(err.Error(), "new pods"))
@ -91,7 +91,46 @@ func TestDaemonSetController_isDaemonSetReady(t *testing.T) {
DesiredNumberScheduled: 1,
NumberAvailable: 0,
}}
retryable, err = mocks.controller.isDaemonSetReady(cd, ds)
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 100)
require.Error(t, err)
require.True(t, retryable)
require.True(t, strings.Contains(err.Error(), "available"))
}
func TestDaemonSetController_isDaemonSetReady_readyThreshold(t *testing.T) {
dc := daemonsetConfigs{name: "podinfo", label: "name", labelValue: "podinfo"}
mocks := newDaemonSetFixture(dc)
cd := &flaggerv1.Canary{}
// succeeded
ds := &appsv1.DaemonSet{Status: appsv1.DaemonSetStatus{
UpdatedNumberScheduled: 1,
DesiredNumberScheduled: 1,
NumberAvailable: 1,
}}
retryable, err := mocks.controller.isDaemonSetReady(cd, ds, 50)
require.NoError(t, err)
require.True(t, retryable)
// waiting for updated ones to be available, 50% available, ok
ds = &appsv1.DaemonSet{Status: appsv1.DaemonSetStatus{
UpdatedNumberScheduled: 4,
DesiredNumberScheduled: 4,
NumberAvailable: 2,
}}
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 50)
require.NoError(t, err)
require.True(t, retryable)
// waiting for updated ones to be available, less than 50% available
ds = &appsv1.DaemonSet{Status: appsv1.DaemonSetStatus{
UpdatedNumberScheduled: 4,
DesiredNumberScheduled: 4,
NumberAvailable: 1,
}}
cd.Status.LastTransitionTime = metav1.Now()
cd.Spec.ProgressDeadlineSeconds = int32p(1e6)
retryable, err = mocks.controller.isDaemonSetReady(cd, ds, 50)
require.Error(t, err)
require.True(t, retryable)
require.True(t, strings.Contains(err.Error(), "available"))

View File

@ -37,7 +37,7 @@ func (c *DeploymentController) IsPrimaryReady(cd *flaggerv1.Canary) error {
return fmt.Errorf("deployment %s.%s get query error: %w", primaryName, cd.Namespace, err)
}
_, err = c.isDeploymentReady(primary, cd.GetProgressDeadlineSeconds())
_, err = c.isDeploymentReady(primary, cd.GetProgressDeadlineSeconds(), cd.GetAnalysisPrimaryReadyThreshold())
if err != nil {
return fmt.Errorf("%s.%s not ready: %w", primaryName, cd.Namespace, err)
}
@ -59,7 +59,7 @@ func (c *DeploymentController) IsCanaryReady(cd *flaggerv1.Canary) (bool, error)
return true, fmt.Errorf("deployment %s.%s get query error: %w", targetName, cd.Namespace, err)
}
retryable, err := c.isDeploymentReady(canary, cd.GetProgressDeadlineSeconds())
retryable, err := c.isDeploymentReady(canary, cd.GetProgressDeadlineSeconds(), 100)
if err != nil {
return retryable, fmt.Errorf(
"canary deployment %s.%s not ready: %w",
@ -71,7 +71,7 @@ func (c *DeploymentController) IsCanaryReady(cd *flaggerv1.Canary) (bool, error)
// isDeploymentReady determines if a deployment is ready by checking the status conditions
// if a deployment has exceeded the progress deadline it returns a non retriable error
func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment, deadline int) (bool, error) {
func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment, deadline int, readyThreshold int) (bool, error) {
retriable := true
if deployment.Generation <= deployment.Status.ObservedGeneration {
progress := c.getDeploymentCondition(deployment.Status, appsv1.DeploymentProgressing)
@ -86,6 +86,9 @@ func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment,
}
}
readyThresholdRatio := float32(readyThreshold) / float32(100)
readyThresholdUpdatedReplicas := int32(float32(deployment.Status.UpdatedReplicas) * readyThresholdRatio)
if progress != nil && progress.Reason == "ProgressDeadlineExceeded" {
return false, fmt.Errorf("deployment %q exceeded its progress deadline", deployment.GetName())
} else if deployment.Spec.Replicas != nil && deployment.Status.UpdatedReplicas < *deployment.Spec.Replicas {
@ -94,9 +97,9 @@ func (c *DeploymentController) isDeploymentReady(deployment *appsv1.Deployment,
} else if deployment.Status.Replicas > deployment.Status.UpdatedReplicas {
return retriable, fmt.Errorf("waiting for rollout to finish: %d old replicas are pending termination",
deployment.Status.Replicas-deployment.Status.UpdatedReplicas)
} else if deployment.Status.AvailableReplicas < deployment.Status.UpdatedReplicas {
return retriable, fmt.Errorf("waiting for rollout to finish: %d of %d updated replicas are available",
deployment.Status.AvailableReplicas, deployment.Status.UpdatedReplicas)
} else if deployment.Status.AvailableReplicas < readyThresholdUpdatedReplicas {
return retriable, fmt.Errorf("waiting for rollout to finish: %d of %d (readyThreshold %d%%) updated replicas are available",
deployment.Status.AvailableReplicas, readyThresholdUpdatedReplicas, readyThreshold)
}
} else {
return true, fmt.Errorf(

View File

@ -45,7 +45,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
// observed generation is less than desired generation
dp := &appsv1.Deployment{Status: appsv1.DeploymentStatus{ObservedGeneration: -1}}
retryable, err := mocks.controller.isDeploymentReady(dp, 0)
retryable, err := mocks.controller.isDeploymentReady(dp, 0, 100)
assert.Error(t, err)
assert.True(t, retryable)
assert.True(t, strings.Contains(err.Error(), "generation"))
@ -57,7 +57,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
ReadyReplicas: 1,
AvailableReplicas: 1,
}}
retryable, err = mocks.controller.isDeploymentReady(dp, 0)
retryable, err = mocks.controller.isDeploymentReady(dp, 0, 100)
assert.NoError(t, err)
assert.True(t, retryable)
@ -67,7 +67,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
}, Spec: appsv1.DeploymentSpec{
Replicas: int32p(2),
}}
_, err = mocks.controller.isDeploymentReady(dp, 0)
_, err = mocks.controller.isDeploymentReady(dp, 0, 100)
assert.Error(t, err)
assert.True(t, strings.Contains(err.Error(), "new replicas"))
@ -76,7 +76,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
UpdatedReplicas: 1,
Replicas: 2,
}}
_, err = mocks.controller.isDeploymentReady(dp, 0)
_, err = mocks.controller.isDeploymentReady(dp, 0, 100)
assert.Error(t, err)
assert.True(t, strings.Contains(err.Error(), "termination"))
@ -85,7 +85,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
UpdatedReplicas: 2,
AvailableReplicas: 1,
}}
_, err = mocks.controller.isDeploymentReady(dp, 0)
_, err = mocks.controller.isDeploymentReady(dp, 0, 100)
assert.Error(t, err)
assert.True(t, strings.Contains(err.Error(), "available"))
@ -94,7 +94,7 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
Conditions: []appsv1.DeploymentCondition{
{Type: appsv1.DeploymentProgressing, Reason: "ProgressDeadlineExceeded"}},
}}
retryable, err = mocks.controller.isDeploymentReady(dp, 0)
retryable, err = mocks.controller.isDeploymentReady(dp, 0, 100)
assert.Error(t, err)
assert.False(t, retryable)
assert.True(t, strings.Contains(err.Error(), "exceeded"))
@ -113,7 +113,48 @@ func TestDeploymentController_isDeploymentReady(t *testing.T) {
UpdatedReplicas: 2,
AvailableReplicas: 1,
}}
retryable, err = mocks.controller.isDeploymentReady(dp, 5)
retryable, err = mocks.controller.isDeploymentReady(dp, 5, 100)
assert.Error(t, err)
assert.False(t, retryable)
}
func TestDeploymentController_isDeploymentReady_readyThreshold(t *testing.T) {
dc := deploymentConfigs{name: "podinfo", label: "name", labelValue: "podinfo"}
mocks := newDeploymentFixture(dc)
// observed generation is less than desired generation
dp := &appsv1.Deployment{Status: appsv1.DeploymentStatus{ObservedGeneration: -1}}
retryable, err := mocks.controller.isDeploymentReady(dp, 0, 50)
assert.Error(t, err)
assert.True(t, retryable)
assert.True(t, strings.Contains(err.Error(), "generation"))
// ok
dp = &appsv1.Deployment{Status: appsv1.DeploymentStatus{
Replicas: 1,
UpdatedReplicas: 1,
ReadyReplicas: 1,
AvailableReplicas: 1,
}}
retryable, err = mocks.controller.isDeploymentReady(dp, 0, 50)
assert.NoError(t, err)
assert.True(t, retryable)
// waiting for updated ones to be available, 50% is available, ok
dp = &appsv1.Deployment{Status: appsv1.DeploymentStatus{
UpdatedReplicas: 4,
AvailableReplicas: 2,
}}
retryable, err = mocks.controller.isDeploymentReady(dp, 0, 50)
assert.NoError(t, err)
assert.True(t, retryable)
// waiting for updated ones to be available, less than 50% available
dp = &appsv1.Deployment{Status: appsv1.DeploymentStatus{
UpdatedReplicas: 4,
AvailableReplicas: 1,
}}
retryable, err = mocks.controller.isDeploymentReady(dp, 0, 50)
assert.Error(t, err)
assert.True(t, strings.Contains(err.Error(), "available"))
}