Fix flaky upgrade test (#1368)

* Fix flaky upgrade test

Signed-off-by: Luis Rascao <luis.rascao@gmail.com>

* fixup! Fix flaky upgrade test

Signed-off-by: Luis Rascao <luis.rascao@gmail.com>

---------

Signed-off-by: Luis Rascao <luis.rascao@gmail.com>
Co-authored-by: Mukundan Sundararajan <65565396+mukundansundar@users.noreply.github.com>
This commit is contained in:
Luis Rascão 2024-01-23 09:55:30 +00:00 committed by GitHub
parent f5eb4fda6c
commit 50e1dffec0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 89 additions and 13 deletions

View File

@ -351,6 +351,8 @@ dapr upgrade -k --runtime-version=1.0.0
The example above shows how to upgrade from your current version to version `1.0.0`.
*Note: `dapr upgrade` will retry up to 5 times upon failure*
#### Supplying Helm values
All available [Helm Chart values](https://github.com/dapr/dapr/tree/master/charts/dapr#configuration) can be set by using the `--set` flag:

View File

@ -21,6 +21,7 @@ import (
helm "helm.sh/helm/v3/pkg/action"
"helm.sh/helm/v3/pkg/chart"
"helm.sh/helm/v3/pkg/release"
"k8s.io/helm/pkg/strvals"
"github.com/hashicorp/go-version"
@ -56,6 +57,16 @@ type UpgradeConfig struct {
ImageVariant string
}
// UpgradeOptions represents options for the upgrade function.
type UpgradeOptions struct {
WithRetry bool
MaxRetries int
RetryInterval time.Duration
}
// UpgradeOption is a functional option type for configuring upgrade.
type UpgradeOption func(*UpgradeOptions)
func Upgrade(conf UpgradeConfig) error {
helmRepo := utils.GetEnv("DAPR_HELM_REPO_URL", daprHelmRepo)
status, err := GetDaprResourcesStatus()
@ -71,14 +82,14 @@ func Upgrade(conf UpgradeConfig) error {
return err
}
helmConf, err := helmConfig(status[0].Namespace)
upgradeClient, helmConf, err := newUpgradeClient(status[0].Namespace, conf)
if err != nil {
return err
return fmt.Errorf("unable to create helm client: %w", err)
}
controlPlaneChart, err := getHelmChart(conf.RuntimeVersion, "dapr", helmRepo, helmConf)
if err != nil {
return err
return fmt.Errorf("unable to get helm chart: %w", err)
}
willHaveDashboardInDaprChart, err := IsDashboardIncluded(conf.RuntimeVersion)
@ -116,13 +127,6 @@ func Upgrade(conf UpgradeConfig) error {
}
}
upgradeClient := helm.NewUpgrade(helmConf)
upgradeClient.ResetValues = true
upgradeClient.Namespace = status[0].Namespace
upgradeClient.CleanupOnFail = true
upgradeClient.Wait = true
upgradeClient.Timeout = time.Duration(conf.Timeout) * time.Second
print.InfoStatusEvent(os.Stdout, "Starting upgrade...")
mtls, err := IsMTLSEnabled()
@ -155,7 +159,7 @@ func Upgrade(conf UpgradeConfig) error {
if !isDowngrade(conf.RuntimeVersion, daprVersion) {
err = applyCRDs(fmt.Sprintf("v%s", conf.RuntimeVersion))
if err != nil {
return err
return fmt.Errorf("unable to apply CRDs: %w", err)
}
} else {
print.InfoStatusEvent(os.Stdout, "Downgrade detected, skipping CRDs.")
@ -166,8 +170,13 @@ func Upgrade(conf UpgradeConfig) error {
return err
}
if _, err = upgradeClient.Run(chart, controlPlaneChart, vals); err != nil {
return err
// Deal with known race condition when applying both CRD and CR close together. The Helm upgrade fails
// when a CR is applied tries to be applied before the CRD is fully registered. On each retry we need a
// fresh client since the kube client locally caches the last OpenAPI schema it received from the server.
// See https://github.com/kubernetes/kubectl/issues/1179
_, err = helmUpgrade(upgradeClient, chart, controlPlaneChart, vals, WithRetry(5, 100*time.Millisecond))
if err != nil {
return fmt.Errorf("failure while running upgrade: %w", err)
}
if dashboardChart != nil {
@ -192,6 +201,55 @@ func Upgrade(conf UpgradeConfig) error {
return nil
}
// WithRetry enables retry with the specified max retries and retry interval.
func WithRetry(maxRetries int, retryInterval time.Duration) UpgradeOption {
return func(o *UpgradeOptions) {
o.WithRetry = true
o.MaxRetries = maxRetries
o.RetryInterval = retryInterval
}
}
func helmUpgrade(client *helm.Upgrade, name string, chart *chart.Chart, vals map[string]interface{}, options ...UpgradeOption) (*release.Release, error) {
upgradeOptions := &UpgradeOptions{
WithRetry: false,
MaxRetries: 0,
RetryInterval: 0,
}
// Apply functional options.
for _, option := range options {
option(upgradeOptions)
}
var release *release.Release
for attempt := 1; ; attempt++ {
_, err := client.Run(name, chart, vals)
if err == nil {
// operation succeeded, no need to retry.
break
}
if !upgradeOptions.WithRetry || attempt >= upgradeOptions.MaxRetries {
// If not retrying or reached max retries, return the error.
return nil, fmt.Errorf("max retries reached, unable to run command: %w", err)
}
print.PendingStatusEvent(os.Stdout, "Retrying after %s...", upgradeOptions.RetryInterval)
time.Sleep(upgradeOptions.RetryInterval)
// create a totally new helm client, this ensures that we fetch a fresh openapi schema from the server on each attempt.
client, _, err = newUpgradeClient(client.Namespace, UpgradeConfig{
Timeout: uint(client.Timeout),
})
if err != nil {
return nil, fmt.Errorf("unable to create helm client: %w", err)
}
}
return release, nil
}
func highAvailabilityEnabled(status []StatusOutput) bool {
for _, s := range status {
if s.Name == "dapr-dashboard" {
@ -264,3 +322,19 @@ func isDowngrade(targetVersion, existingVersion string) bool {
}
return target.LessThan(existing)
}
func newUpgradeClient(namespace string, cfg UpgradeConfig) (*helm.Upgrade, *helm.Configuration, error) {
helmCfg, err := helmConfig(namespace)
if err != nil {
return nil, nil, err
}
client := helm.NewUpgrade(helmCfg)
client.ResetValues = true
client.Namespace = namespace
client.CleanupOnFail = true
client.Wait = true
client.Timeout = time.Duration(cfg.Timeout) * time.Second
return client, helmCfg, nil
}