Use fast-retry and cleanup task options for tests

The launch configuration test exposed that our integration tests don't
retry for very long, and wait a long time in between retries.

Create a RunTasksOptions type to hold the parameters, in particular
max task time, and the amount of time we wait when all tasks have
failed.
This commit is contained in:
Justin Santa Barbara 2018-06-17 23:08:32 -04:00
parent a9eb6fed2e
commit 1af610a6c4
18 changed files with 91 additions and 50 deletions

View File

@ -226,7 +226,7 @@ func runTest(t *testing.T, h *testutils.IntegrationTestHarness, clusterName stri
options.InitDefaults()
options.Target = "terraform"
options.OutDir = path.Join(h.TempDir, "out")
options.MaxTaskDuration = 30 * time.Second
options.RunTasksOptions.MaxTaskDuration = 30 * time.Second
if phase != nil {
options.Phase = string(*phase)
}
@ -514,7 +514,7 @@ func runTestCloudformation(t *testing.T, clusterName string, srcDir string, vers
options.InitDefaults()
options.Target = "cloudformation"
options.OutDir = path.Join(h.TempDir, "out")
options.MaxTaskDuration = 30 * time.Second
options.RunTasksOptions.MaxTaskDuration = 30 * time.Second
// We don't test it here, and it adds a dependency on kubectl
options.CreateKubecfg = false

View File

@ -144,7 +144,7 @@ func runLifecycleTest(h *testutils.IntegrationTestHarness, o *LifecycleTestOptio
{
options := &UpdateClusterOptions{}
options.InitDefaults()
options.MaxTaskDuration = 10 * time.Second
options.RunTasksOptions.MaxTaskDuration = 10 * time.Second
options.Yes = true
// We don't test it here, and it adds a dependency on kubectl
@ -160,7 +160,7 @@ func runLifecycleTest(h *testutils.IntegrationTestHarness, o *LifecycleTestOptio
options := &UpdateClusterOptions{}
options.InitDefaults()
options.Target = cloudup.TargetDryRun
options.MaxTaskDuration = 10 * time.Second
options.RunTasksOptions.MaxTaskDuration = 10 * time.Second
// We don't test it here, and it adds a dependency on kubectl
options.CreateKubecfg = false

View File

@ -23,7 +23,6 @@ import (
"io/ioutil"
"path/filepath"
"strings"
"time"
"github.com/golang/glog"
"github.com/spf13/cobra"
@ -63,7 +62,7 @@ type UpdateClusterOptions struct {
Models string
OutDir string
SSHPublicKey string
MaxTaskDuration time.Duration
RunTasksOptions fi.RunTasksOptions
CreateKubecfg bool
Phase string
@ -79,8 +78,8 @@ func (o *UpdateClusterOptions) InitDefaults() {
o.Models = strings.Join(cloudup.CloudupModels, ",")
o.SSHPublicKey = ""
o.OutDir = ""
o.MaxTaskDuration = cloudup.DefaultMaxTaskDuration
o.CreateKubecfg = true
o.RunTasksOptions.InitDefaults()
}
func NewCmdUpdateCluster(f *util.Factory, out io.Writer) *cobra.Command {
@ -246,7 +245,7 @@ func RunUpdateCluster(f *util.Factory, clusterName string, out io.Writer, c *Upd
Cluster: cluster,
DryRun: isDryrun,
InstanceGroups: instanceGroups,
MaxTaskDuration: c.MaxTaskDuration,
RunTasksOptions: &c.RunTasksOptions,
Models: strings.Split(c.Models, ","),
OutDir: c.OutDir,
Phase: phase,

View File

@ -104,11 +104,12 @@ func main() {
command = append(command, s)
}
i := bootstrap.Installation{
MaxTaskDuration: 5 * time.Minute,
CacheDir: flagCacheDir,
Command: command,
FSRoot: flagRootFS,
CacheDir: flagCacheDir,
Command: command,
FSRoot: flagRootFS,
}
i.RunTasksOptions.InitDefaults()
i.RunTasksOptions.MaxTaskDuration = 5 * time.Minute
err = i.Run()
if err == nil {
fmt.Printf("service installed")

View File

@ -0,0 +1,18 @@
## Release notes for kops 1.10 series
# Significant changes
* Old LaunchConfigurations are now deleted on AWS. By default the 3 most recent LaunchConfigurations for each InstanceGroup are kept, and older ones are automatically removed. To keep the existing behaviour set the KeepLaunchConfigurations feature flag i.e. `export KOPS_FEATURE_FLAGS=KeepLaunchConfigurations`
# Required Actions
None known at this time
# Highlighted changes
(to follow)
# Full change list
(to follow)

View File

@ -21,7 +21,6 @@ import (
"fmt"
"os"
"strings"
"time"
"github.com/golang/glog"
"k8s.io/apimachinery/pkg/util/sets"
@ -36,7 +35,7 @@ import (
type Installation struct {
FSRoot string
CacheDir string
MaxTaskDuration time.Duration
RunTasksOptions fi.RunTasksOptions
Command []string
}
@ -86,7 +85,7 @@ func (i *Installation) Run() error {
}
defer context.Close()
err = context.RunTasks(i.MaxTaskDuration)
err = context.RunTasks(i.RunTasksOptions)
if err != nil {
return fmt.Errorf("error running tasks: %v", err)
}

View File

@ -62,7 +62,8 @@ var _ fi.Target = &MockTarget{}
func TestKeypairUpgrade(t *testing.T) {
lifecycle := fi.LifecycleSync
defaultDeadline := 2 * time.Second
runTasksOptions := fi.RunTasksOptions{}
runTasksOptions.MaxTaskDuration = 2 * time.Second
target := &MockTarget{}
@ -119,7 +120,7 @@ func TestKeypairUpgrade(t *testing.T) {
t.Fatalf("error building context: %v", err)
}
if err := context.RunTasks(defaultDeadline); err != nil {
if err := context.RunTasks(runTasksOptions); err != nil {
t.Fatalf("unexpected error during Run: %v", err)
}
}
@ -160,7 +161,7 @@ func TestKeypairUpgrade(t *testing.T) {
t.Fatalf("error building context: %v", err)
}
if err := context.RunTasks(defaultDeadline); err != nil {
if err := context.RunTasks(runTasksOptions); err != nil {
t.Fatalf("unexpected error during Run: %v", err)
}
}
@ -180,7 +181,7 @@ func TestKeypairUpgrade(t *testing.T) {
t.Fatalf("error building context: %v", err)
}
if err := context.RunTasks(defaultDeadline); err != nil {
if err := context.RunTasks(runTasksOptions); err != nil {
t.Fatalf("unexpected error during Run: %v", err)
}
}

View File

@ -22,7 +22,6 @@ import (
"os"
"path"
"strings"
"time"
"github.com/blang/semver"
"github.com/golang/glog"
@ -71,8 +70,7 @@ import (
)
const (
DefaultMaxTaskDuration = 10 * time.Minute
starline = "*********************************************************************************\n"
starline = "*********************************************************************************\n"
)
var (
@ -124,7 +122,8 @@ type ApplyClusterCmd struct {
// DryRun is true if this is only a dry run
DryRun bool
MaxTaskDuration time.Duration
// RunTasksOptions defines parameters for task execution, e.g. retry interval
RunTasksOptions *fi.RunTasksOptions
// The channel we are using
channel *kops.Channel
@ -142,10 +141,6 @@ type ApplyClusterCmd struct {
}
func (c *ApplyClusterCmd) Run() error {
if c.MaxTaskDuration == 0 {
c.MaxTaskDuration = DefaultMaxTaskDuration
}
if c.InstanceGroups == nil {
list, err := c.Clientset.InstanceGroupsFor(c.Cluster).List(metav1.ListOptions{})
if err != nil {
@ -808,7 +803,14 @@ func (c *ApplyClusterCmd) Run() error {
}
defer context.Close()
err = context.RunTasks(c.MaxTaskDuration)
var options fi.RunTasksOptions
if c.RunTasksOptions != nil {
options = *c.RunTasksOptions
} else {
options.InitDefaults()
}
err = context.RunTasks(options)
if err != nil {
return fmt.Errorf("error running tasks: %v", err)
}

View File

@ -32,7 +32,10 @@ import (
"k8s.io/kops/upup/pkg/fi/cloudup/awsup"
)
const defaultDeadline = 2 * time.Second
var testRunTasksOptions = fi.RunTasksOptions{
MaxTaskDuration: 2 * time.Second,
WaitAfterAllTasksFailed: 500 * time.Millisecond,
}
func TestElasticIPCreate(t *testing.T) {
cloud := awsup.BuildMockAWSCloud("us-east-1", "abc")
@ -77,7 +80,7 @@ func TestElasticIPCreate(t *testing.T) {
t.Fatalf("error building context: %v", err)
}
if err := context.RunTasks(defaultDeadline); err != nil {
if err := context.RunTasks(testRunTasksOptions); err != nil {
t.Fatalf("unexpected error during Run: %v", err)
}
@ -119,7 +122,7 @@ func checkNoChanges(t *testing.T, cloud fi.Cloud, allTasks map[string]fi.Task) {
t.Fatalf("error building context: %v", err)
}
if err := context.RunTasks(defaultDeadline); err != nil {
if err := context.RunTasks(testRunTasksOptions); err != nil {
t.Fatalf("unexpected error during Run: %v", err)
}

View File

@ -111,7 +111,7 @@ func TestSharedInternetGatewayDoesNotRename(t *testing.T) {
t.Fatalf("error building context: %v", err)
}
if err := context.RunTasks(defaultDeadline); err != nil {
if err := context.RunTasks(testRunTasksOptions); err != nil {
t.Fatalf("unexpected error during Run: %v", err)
}

View File

@ -39,8 +39,6 @@ import (
// keep, we delete older ones
var defaultRetainLaunchConfigurationCount = 3
// TODO: Release note , including featureflag
// RetainLaunchConfigurationCount returns the number of launch configurations to keep
func RetainLaunchConfigurationCount() int {
if featureflag.KeepLaunchConfigurations.Enabled() {

View File

@ -19,7 +19,6 @@ package awstasks
import (
"strconv"
"testing"
"time"
"github.com/aws/aws-sdk-go/aws"
@ -77,10 +76,16 @@ func TestLaunchConfigurationGarbageCollection(t *testing.T) {
t.Fatalf("error building context: %v", err)
}
time.Sleep(time.Second)
// TODO: Remove sleep, find out why we don't retry
if err := context.RunTasks(defaultDeadline); err != nil {
// We use a longer deadline because we know we often need to
// retry here, because we create different versions of
// launchconfigurations using the timestamp, but only to
// per-second granularity. This normally works out because we
// retry for O(minutes), so after a few retries the clock has
// advanced. But if we use too short a deadline in our tests we
// don't get this behaviour.
options := testRunTasksOptions
options.MaxTaskDuration = 5 * time.Second
if err := context.RunTasks(options); err != nil {
t.Fatalf("unexpected error during Run: %v", err)
}

View File

@ -132,7 +132,7 @@ func TestSecurityGroupCreate(t *testing.T) {
t.Fatalf("error building context: %v", err)
}
if err := context.RunTasks(defaultDeadline); err != nil {
if err := context.RunTasks(testRunTasksOptions); err != nil {
t.Fatalf("unexpected error during Run: %v", err)
}

View File

@ -101,7 +101,7 @@ func TestSubnetCreate(t *testing.T) {
t.Fatalf("error building context: %v", err)
}
if err := context.RunTasks(defaultDeadline); err != nil {
if err := context.RunTasks(testRunTasksOptions); err != nil {
t.Fatalf("unexpected error during Run: %v", err)
}
@ -219,7 +219,7 @@ func TestSharedSubnetCreateDoesNotCreateNew(t *testing.T) {
t.Fatalf("error building context: %v", err)
}
if err := context.RunTasks(defaultDeadline); err != nil {
if err := context.RunTasks(testRunTasksOptions); err != nil {
t.Fatalf("unexpected error during Run: %v", err)
}

View File

@ -57,7 +57,7 @@ func TestVPCCreate(t *testing.T) {
t.Fatalf("error building context: %v", err)
}
if err := context.RunTasks(defaultDeadline); err != nil {
if err := context.RunTasks(testRunTasksOptions); err != nil {
t.Fatalf("unexpected error during Run: %v", err)
}

View File

@ -23,7 +23,6 @@ import (
"os"
"reflect"
"strings"
"time"
"github.com/golang/glog"
"k8s.io/kops/dnsprovider/pkg/dnsprovider"
@ -80,11 +79,12 @@ func (c *Context) AllTasks() map[string]Task {
return c.tasks
}
func (c *Context) RunTasks(maxTaskDuration time.Duration) error {
func (c *Context) RunTasks(options RunTasksOptions) error {
e := &executor{
context: c,
options: options,
}
return e.RunTasks(c.tasks, maxTaskDuration)
return e.RunTasks(c.tasks)
}
func (c *Context) Close() {

View File

@ -27,6 +27,8 @@ import (
type executor struct {
context *Context
options RunTasksOptions
}
type taskState struct {
@ -38,9 +40,19 @@ type taskState struct {
dependencies []*taskState
}
type RunTasksOptions struct {
MaxTaskDuration time.Duration
WaitAfterAllTasksFailed time.Duration
}
func (o *RunTasksOptions) InitDefaults() {
o.MaxTaskDuration = 10 * time.Minute
o.WaitAfterAllTasksFailed = 10 * time.Second
}
// RunTasks executes all the tasks, considering their dependencies
// It will perform some re-execution on error, retrying as long as progress is still being made
func (e *executor) RunTasks(taskMap map[string]Task, maxTaskDuration time.Duration) error {
func (e *executor) RunTasks(taskMap map[string]Task) error {
dependencies := FindTaskDependencies(taskMap)
taskStates := make(map[string]*taskState)
@ -80,7 +92,7 @@ func (e *executor) RunTasks(taskMap map[string]Task, maxTaskDuration time.Durati
}
if ready {
if ts.deadline.IsZero() {
ts.deadline = time.Now().Add(maxTaskDuration)
ts.deadline = time.Now().Add(e.options.MaxTaskDuration)
} else if time.Now().After(ts.deadline) {
return fmt.Errorf("deadline exceeded executing task %v. Example error: %v", ts.key, ts.lastError)
}
@ -131,7 +143,7 @@ func (e *executor) RunTasks(taskMap map[string]Task, maxTaskDuration time.Durati
panic("did not make progress executing tasks; but no errors reported")
}
glog.Infof("No progress made, sleeping before retrying %d failed task(s)", len(errors))
time.Sleep(10 * time.Second)
time.Sleep(e.options.WaitAfterAllTasksFailed)
}
}

View File

@ -291,7 +291,10 @@ func (c *NodeUpCommand) Run(out io.Writer) error {
}
defer context.Close()
err = context.RunTasks(MaxTaskDuration)
var options fi.RunTasksOptions
options.InitDefaults()
err = context.RunTasks(options)
if err != nil {
glog.Exitf("error running tasks: %v", err)
}