Use fast-retry and cleanup task options for tests

The launch configuration test exposed that our integration tests don't retry for very long, and wait a long time in between retries. Create a RunTasksOptions type to hold the parameters, in particular max task time, and the amount of time we wait when all tasks have failed.
2018-06-17 23:08:32 -04:00 · 2018-06-17 23:08:32 -04:00 · 1af610a6c4
parent a9eb6fed2e
commit 1af610a6c4
18 changed files with 91 additions and 50 deletions
--- a/cmd/kops/integration_test.go
+++ b/cmd/kops/integration_test.go
@ -226,7 +226,7 @@ func runTest(t *testing.T, h *testutils.IntegrationTestHarness, clusterName stri
 		options.InitDefaults()
 		options.Target = "terraform"
 		options.OutDir = path.Join(h.TempDir, "out")
-		options.MaxTaskDuration = 30 * time.Second
+		options.RunTasksOptions.MaxTaskDuration = 30 * time.Second
 		if phase != nil {
 			options.Phase = string(*phase)
 		}
@ -514,7 +514,7 @@ func runTestCloudformation(t *testing.T, clusterName string, srcDir string, vers
 		options.InitDefaults()
 		options.Target = "cloudformation"
 		options.OutDir = path.Join(h.TempDir, "out")
-		options.MaxTaskDuration = 30 * time.Second
+		options.RunTasksOptions.MaxTaskDuration = 30 * time.Second

 		// We don't test it here, and it adds a dependency on kubectl
 		options.CreateKubecfg = false
--- a/cmd/kops/lifecycle_integration_test.go
+++ b/cmd/kops/lifecycle_integration_test.go
@ -144,7 +144,7 @@ func runLifecycleTest(h *testutils.IntegrationTestHarness, o *LifecycleTestOptio
 	{
 		options := &UpdateClusterOptions{}
 		options.InitDefaults()
-		options.MaxTaskDuration = 10 * time.Second
+		options.RunTasksOptions.MaxTaskDuration = 10 * time.Second
 		options.Yes = true

 		// We don't test it here, and it adds a dependency on kubectl
@ -160,7 +160,7 @@ func runLifecycleTest(h *testutils.IntegrationTestHarness, o *LifecycleTestOptio
 		options := &UpdateClusterOptions{}
 		options.InitDefaults()
 		options.Target = cloudup.TargetDryRun
-		options.MaxTaskDuration = 10 * time.Second
+		options.RunTasksOptions.MaxTaskDuration = 10 * time.Second

 		// We don't test it here, and it adds a dependency on kubectl
 		options.CreateKubecfg = false
--- a/cmd/kops/update_cluster.go
+++ b/cmd/kops/update_cluster.go
@ -23,7 +23,6 @@ import (
 	"io/ioutil"
 	"path/filepath"
 	"strings"
-	"time"

 	"github.com/golang/glog"
 	"github.com/spf13/cobra"
@ -63,7 +62,7 @@ type UpdateClusterOptions struct {
 	Models          string
 	OutDir          string
 	SSHPublicKey    string
-	MaxTaskDuration time.Duration
+	RunTasksOptions fi.RunTasksOptions
 	CreateKubecfg   bool

 	Phase string
@ -79,8 +78,8 @@ func (o *UpdateClusterOptions) InitDefaults() {
 	o.Models = strings.Join(cloudup.CloudupModels, ",")
 	o.SSHPublicKey = ""
 	o.OutDir = ""
-	o.MaxTaskDuration = cloudup.DefaultMaxTaskDuration
 	o.CreateKubecfg = true
+	o.RunTasksOptions.InitDefaults()
 }

 func NewCmdUpdateCluster(f *util.Factory, out io.Writer) *cobra.Command {
@ -246,7 +245,7 @@ func RunUpdateCluster(f *util.Factory, clusterName string, out io.Writer, c *Upd
 		Cluster:            cluster,
 		DryRun:             isDryrun,
 		InstanceGroups:     instanceGroups,
-		MaxTaskDuration:    c.MaxTaskDuration,
+		RunTasksOptions:    &c.RunTasksOptions,
 		Models:             strings.Split(c.Models, ","),
 		OutDir:             c.OutDir,
 		Phase:              phase,
--- a/cmd/nodeup/main.go
+++ b/cmd/nodeup/main.go
@ -104,11 +104,12 @@ func main() {
 				command = append(command, s)
 			}
 			i := bootstrap.Installation{
-				MaxTaskDuration: 5 * time.Minute,
-				CacheDir:        flagCacheDir,
-				Command:         command,
-				FSRoot:          flagRootFS,
+				CacheDir: flagCacheDir,
+				Command:  command,
+				FSRoot:   flagRootFS,
 			}
+			i.RunTasksOptions.InitDefaults()
+			i.RunTasksOptions.MaxTaskDuration = 5 * time.Minute
 			err = i.Run()
 			if err == nil {
 				fmt.Printf("service installed")
--- a/docs/releases/1.10-NOTES.md
+++ b/docs/releases/1.10-NOTES.md
@ -0,0 +1,18 @@
+## Release notes for kops 1.10 series
+
+# Significant changes
+
+* Old LaunchConfigurations are now deleted on AWS.  By default the 3 most recent LaunchConfigurations for each InstanceGroup are kept, and older ones are automatically removed.  To keep the existing behaviour set the KeepLaunchConfigurations feature flag i.e. `export KOPS_FEATURE_FLAGS=KeepLaunchConfigurations`
+
+# Required Actions
+
+None known at this time
+
+# Highlighted changes
+
+(to follow)
+
+# Full change list
+
+(to follow)
+
--- a/nodeup/pkg/bootstrap/install.go
+++ b/nodeup/pkg/bootstrap/install.go
@ -21,7 +21,6 @@ import (
 	"fmt"
 	"os"
 	"strings"
-	"time"

 	"github.com/golang/glog"
 	"k8s.io/apimachinery/pkg/util/sets"
@ -36,7 +35,7 @@ import (
 type Installation struct {
 	FSRoot          string
 	CacheDir        string
-	MaxTaskDuration time.Duration
+	RunTasksOptions fi.RunTasksOptions
 	Command         []string
 }

@ -86,7 +85,7 @@ func (i *Installation) Run() error {
 	}
 	defer context.Close()

-	err = context.RunTasks(i.MaxTaskDuration)
+	err = context.RunTasks(i.RunTasksOptions)
 	if err != nil {
 		return fmt.Errorf("error running tasks: %v", err)
 	}
--- a/tests/keypair_test.go
+++ b/tests/keypair_test.go
@ -62,7 +62,8 @@ var _ fi.Target = &MockTarget{}
 func TestKeypairUpgrade(t *testing.T) {
 	lifecycle := fi.LifecycleSync

-	defaultDeadline := 2 * time.Second
+	runTasksOptions := fi.RunTasksOptions{}
+	runTasksOptions.MaxTaskDuration = 2 * time.Second

 	target := &MockTarget{}

@ -119,7 +120,7 @@ func TestKeypairUpgrade(t *testing.T) {
 			t.Fatalf("error building context: %v", err)
 		}

-		if err := context.RunTasks(defaultDeadline); err != nil {
+		if err := context.RunTasks(runTasksOptions); err != nil {
 			t.Fatalf("unexpected error during Run: %v", err)
 		}
 	}
@ -160,7 +161,7 @@ func TestKeypairUpgrade(t *testing.T) {
 			t.Fatalf("error building context: %v", err)
 		}

-		if err := context.RunTasks(defaultDeadline); err != nil {
+		if err := context.RunTasks(runTasksOptions); err != nil {
 			t.Fatalf("unexpected error during Run: %v", err)
 		}
 	}
@ -180,7 +181,7 @@ func TestKeypairUpgrade(t *testing.T) {
 			t.Fatalf("error building context: %v", err)
 		}

-		if err := context.RunTasks(defaultDeadline); err != nil {
+		if err := context.RunTasks(runTasksOptions); err != nil {
 			t.Fatalf("unexpected error during Run: %v", err)
 		}
 	}
--- a/upup/pkg/fi/cloudup/apply_cluster.go
+++ b/upup/pkg/fi/cloudup/apply_cluster.go
@ -22,7 +22,6 @@ import (
 	"os"
 	"path"
 	"strings"
-	"time"

 	"github.com/blang/semver"
 	"github.com/golang/glog"
@ -71,8 +70,7 @@ import (
 )

 const (
-	DefaultMaxTaskDuration = 10 * time.Minute
-	starline               = "*********************************************************************************\n"
+	starline = "*********************************************************************************\n"
 )

 var (
@ -124,7 +122,8 @@ type ApplyClusterCmd struct {
 	// DryRun is true if this is only a dry run
 	DryRun bool

-	MaxTaskDuration time.Duration
+	// RunTasksOptions defines parameters for task execution, e.g. retry interval
+	RunTasksOptions *fi.RunTasksOptions

 	// The channel we are using
 	channel *kops.Channel
@ -142,10 +141,6 @@ type ApplyClusterCmd struct {
 }

 func (c *ApplyClusterCmd) Run() error {
-	if c.MaxTaskDuration == 0 {
-		c.MaxTaskDuration = DefaultMaxTaskDuration
-	}
-
 	if c.InstanceGroups == nil {
 		list, err := c.Clientset.InstanceGroupsFor(c.Cluster).List(metav1.ListOptions{})
 		if err != nil {
@ -808,7 +803,14 @@ func (c *ApplyClusterCmd) Run() error {
 	}
 	defer context.Close()

-	err = context.RunTasks(c.MaxTaskDuration)
+	var options fi.RunTasksOptions
+	if c.RunTasksOptions != nil {
+		options = *c.RunTasksOptions
+	} else {
+		options.InitDefaults()
+	}
+
+	err = context.RunTasks(options)
 	if err != nil {
 		return fmt.Errorf("error running tasks: %v", err)
 	}
--- a/upup/pkg/fi/cloudup/awstasks/elastic_ip_test.go
+++ b/upup/pkg/fi/cloudup/awstasks/elastic_ip_test.go
@ -32,7 +32,10 @@ import (
 	"k8s.io/kops/upup/pkg/fi/cloudup/awsup"
 )

-const defaultDeadline = 2 * time.Second
+var testRunTasksOptions = fi.RunTasksOptions{
+	MaxTaskDuration:         2 * time.Second,
+	WaitAfterAllTasksFailed: 500 * time.Millisecond,
+}

 func TestElasticIPCreate(t *testing.T) {
 	cloud := awsup.BuildMockAWSCloud("us-east-1", "abc")
@ -77,7 +80,7 @@ func TestElasticIPCreate(t *testing.T) {
 			t.Fatalf("error building context: %v", err)
 		}

-		if err := context.RunTasks(defaultDeadline); err != nil {
+		if err := context.RunTasks(testRunTasksOptions); err != nil {
 			t.Fatalf("unexpected error during Run: %v", err)
 		}

@ -119,7 +122,7 @@ func checkNoChanges(t *testing.T, cloud fi.Cloud, allTasks map[string]fi.Task) {
 		t.Fatalf("error building context: %v", err)
 	}

-	if err := context.RunTasks(defaultDeadline); err != nil {
+	if err := context.RunTasks(testRunTasksOptions); err != nil {
 		t.Fatalf("unexpected error during Run: %v", err)
 	}

--- a/upup/pkg/fi/cloudup/awstasks/internetgateway_test.go
+++ b/upup/pkg/fi/cloudup/awstasks/internetgateway_test.go
@ -111,7 +111,7 @@ func TestSharedInternetGatewayDoesNotRename(t *testing.T) {
 			t.Fatalf("error building context: %v", err)
 		}

-		if err := context.RunTasks(defaultDeadline); err != nil {
+		if err := context.RunTasks(testRunTasksOptions); err != nil {
 			t.Fatalf("unexpected error during Run: %v", err)
 		}

--- a/upup/pkg/fi/cloudup/awstasks/launchconfiguration.go
+++ b/upup/pkg/fi/cloudup/awstasks/launchconfiguration.go
@ -39,8 +39,6 @@ import (
 // keep, we delete older ones
 var defaultRetainLaunchConfigurationCount = 3

-// TODO: Release note , including featureflag
-
 // RetainLaunchConfigurationCount returns the number of launch configurations to keep
 func RetainLaunchConfigurationCount() int {
 	if featureflag.KeepLaunchConfigurations.Enabled() {
--- a/upup/pkg/fi/cloudup/awstasks/launchconfiguration_test.go
+++ b/upup/pkg/fi/cloudup/awstasks/launchconfiguration_test.go
@ -19,7 +19,6 @@ package awstasks
 import (
 	"strconv"
 	"testing"
-
 	"time"

 	"github.com/aws/aws-sdk-go/aws"
@ -77,10 +76,16 @@ func TestLaunchConfigurationGarbageCollection(t *testing.T) {
 				t.Fatalf("error building context: %v", err)
 			}

-			time.Sleep(time.Second)
-			// TODO: Remove sleep, find out why we don't retry
-
-			if err := context.RunTasks(defaultDeadline); err != nil {
+			// We use a longer deadline because we know we often need to
+			// retry here, because we create different versions of
+			// launchconfigurations using the timestamp, but only to
+			// per-second granularity.  This normally works out because we
+			// retry for O(minutes), so after a few retries the clock has
+			// advanced.  But if we use too short a deadline in our tests we
+			// don't get this behaviour.
+			options := testRunTasksOptions
+			options.MaxTaskDuration = 5 * time.Second
+			if err := context.RunTasks(options); err != nil {
 				t.Fatalf("unexpected error during Run: %v", err)
 			}

--- a/upup/pkg/fi/cloudup/awstasks/securitygroup_test.go
+++ b/upup/pkg/fi/cloudup/awstasks/securitygroup_test.go
@ -132,7 +132,7 @@ func TestSecurityGroupCreate(t *testing.T) {
 			t.Fatalf("error building context: %v", err)
 		}

-		if err := context.RunTasks(defaultDeadline); err != nil {
+		if err := context.RunTasks(testRunTasksOptions); err != nil {
 			t.Fatalf("unexpected error during Run: %v", err)
 		}

--- a/upup/pkg/fi/cloudup/awstasks/subnet_test.go
+++ b/upup/pkg/fi/cloudup/awstasks/subnet_test.go
@ -101,7 +101,7 @@ func TestSubnetCreate(t *testing.T) {
 			t.Fatalf("error building context: %v", err)
 		}

-		if err := context.RunTasks(defaultDeadline); err != nil {
+		if err := context.RunTasks(testRunTasksOptions); err != nil {
 			t.Fatalf("unexpected error during Run: %v", err)
 		}

@ -219,7 +219,7 @@ func TestSharedSubnetCreateDoesNotCreateNew(t *testing.T) {
 			t.Fatalf("error building context: %v", err)
 		}

-		if err := context.RunTasks(defaultDeadline); err != nil {
+		if err := context.RunTasks(testRunTasksOptions); err != nil {
 			t.Fatalf("unexpected error during Run: %v", err)
 		}

--- a/upup/pkg/fi/cloudup/awstasks/vpc_test.go
+++ b/upup/pkg/fi/cloudup/awstasks/vpc_test.go
@ -57,7 +57,7 @@ func TestVPCCreate(t *testing.T) {
 			t.Fatalf("error building context: %v", err)
 		}

-		if err := context.RunTasks(defaultDeadline); err != nil {
+		if err := context.RunTasks(testRunTasksOptions); err != nil {
 			t.Fatalf("unexpected error during Run: %v", err)
 		}

--- a/upup/pkg/fi/context.go
+++ b/upup/pkg/fi/context.go
@ -23,7 +23,6 @@ import (
 	"os"
 	"reflect"
 	"strings"
-	"time"

 	"github.com/golang/glog"
 	"k8s.io/kops/dnsprovider/pkg/dnsprovider"
@ -80,11 +79,12 @@ func (c *Context) AllTasks() map[string]Task {
 	return c.tasks
 }

-func (c *Context) RunTasks(maxTaskDuration time.Duration) error {
+func (c *Context) RunTasks(options RunTasksOptions) error {
 	e := &executor{
 		context: c,
+		options: options,
 	}
-	return e.RunTasks(c.tasks, maxTaskDuration)
+	return e.RunTasks(c.tasks)
 }

 func (c *Context) Close() {
--- a/upup/pkg/fi/executor.go
+++ b/upup/pkg/fi/executor.go
@ -27,6 +27,8 @@ import (

 type executor struct {
 	context *Context
+
+	options RunTasksOptions
 }

 type taskState struct {
@ -38,9 +40,19 @@ type taskState struct {
 	dependencies []*taskState
 }

+type RunTasksOptions struct {
+	MaxTaskDuration         time.Duration
+	WaitAfterAllTasksFailed time.Duration
+}
+
+func (o *RunTasksOptions) InitDefaults() {
+	o.MaxTaskDuration = 10 * time.Minute
+	o.WaitAfterAllTasksFailed = 10 * time.Second
+}
+
 // RunTasks executes all the tasks, considering their dependencies
 // It will perform some re-execution on error, retrying as long as progress is still being made
-func (e *executor) RunTasks(taskMap map[string]Task, maxTaskDuration time.Duration) error {
+func (e *executor) RunTasks(taskMap map[string]Task) error {
 	dependencies := FindTaskDependencies(taskMap)

 	taskStates := make(map[string]*taskState)
@ -80,7 +92,7 @@ func (e *executor) RunTasks(taskMap map[string]Task, maxTaskDuration time.Durati
 			}
 			if ready {
 				if ts.deadline.IsZero() {
-					ts.deadline = time.Now().Add(maxTaskDuration)
+					ts.deadline = time.Now().Add(e.options.MaxTaskDuration)
 				} else if time.Now().After(ts.deadline) {
 					return fmt.Errorf("deadline exceeded executing task %v. Example error: %v", ts.key, ts.lastError)
 				}
@ -131,7 +143,7 @@ func (e *executor) RunTasks(taskMap map[string]Task, maxTaskDuration time.Durati
 				panic("did not make progress executing tasks; but no errors reported")
 			}
 			glog.Infof("No progress made, sleeping before retrying %d failed task(s)", len(errors))
-			time.Sleep(10 * time.Second)
+			time.Sleep(e.options.WaitAfterAllTasksFailed)
 		}
 	}

--- a/upup/pkg/fi/nodeup/command.go
+++ b/upup/pkg/fi/nodeup/command.go
@ -291,7 +291,10 @@ func (c *NodeUpCommand) Run(out io.Writer) error {
 	}
 	defer context.Close()

-	err = context.RunTasks(MaxTaskDuration)
+	var options fi.RunTasksOptions
+	options.InitDefaults()
+
+	err = context.RunTasks(options)
 	if err != nil {
 		glog.Exitf("error running tasks: %v", err)
 	}