Merge pull request #15301 from infonova/os-rework-retry-failed-servers

OpenStack: Use task engine to retry failed servers
This commit is contained in:
Kubernetes Prow Robot 2023-05-22 14:34:34 -07:00 committed by GitHub
commit b78f1fab3a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 63 additions and 6 deletions

View File

@ -71,6 +71,7 @@ ServerGroup:
Name: cluster-node
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""

View File

@ -70,6 +70,7 @@ ServerGroup:
Name: cluster-node
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""

View File

@ -70,6 +70,7 @@ ServerGroup:
Name: cluster-node
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""

View File

@ -72,6 +72,7 @@ ServerGroup:
Name: cluster-node
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""

View File

@ -69,6 +69,7 @@ ServerGroup:
Name: cluster-node
Policies:
- soft-anti-affinity
Status: null
UserData:
task:
Lifecycle: ""

View File

@ -129,6 +129,7 @@ ServerGroup:
Name: cluster-master
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -217,6 +218,7 @@ ServerGroup:
Name: cluster-master
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -305,6 +307,7 @@ ServerGroup:
Name: cluster-master
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -384,6 +387,7 @@ ServerGroup:
Name: cluster-node
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -463,6 +467,7 @@ ServerGroup:
Name: cluster-node
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -542,6 +547,7 @@ ServerGroup:
Name: cluster-node
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""

View File

@ -109,6 +109,7 @@ ServerGroup:
Name: cluster-master-a
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -185,6 +186,7 @@ ServerGroup:
Name: cluster-master-b
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -261,6 +263,7 @@ ServerGroup:
Name: cluster-master-c
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -334,6 +337,7 @@ ServerGroup:
Name: cluster-node-a
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -407,6 +411,7 @@ ServerGroup:
Name: cluster-node-b
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -480,6 +485,7 @@ ServerGroup:
Name: cluster-node-c
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""

View File

@ -109,6 +109,7 @@ ServerGroup:
Name: cluster-master-a
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -185,6 +186,7 @@ ServerGroup:
Name: cluster-master-b
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -261,6 +263,7 @@ ServerGroup:
Name: cluster-master-c
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -334,6 +337,7 @@ ServerGroup:
Name: cluster-node-a
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -407,6 +411,7 @@ ServerGroup:
Name: cluster-node-b
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -480,6 +485,7 @@ ServerGroup:
Name: cluster-node-c
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""

View File

@ -141,6 +141,7 @@ ServerGroup:
Name: cluster-master-a
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -229,6 +230,7 @@ ServerGroup:
Name: cluster-master-b
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -317,6 +319,7 @@ ServerGroup:
Name: cluster-master-c
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -396,6 +399,7 @@ ServerGroup:
Name: cluster-node-a
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -475,6 +479,7 @@ ServerGroup:
Name: cluster-node-b
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -554,6 +559,7 @@ ServerGroup:
Name: cluster-node-c
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""

View File

@ -93,6 +93,7 @@ ServerGroup:
Name: cluster-master-a
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -175,6 +176,7 @@ ServerGroup:
Name: cluster-master-b
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -257,6 +259,7 @@ ServerGroup:
Name: cluster-master-c
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -330,6 +333,7 @@ ServerGroup:
Name: cluster-node-a
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -403,6 +407,7 @@ ServerGroup:
Name: cluster-node-b
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -476,6 +481,7 @@ ServerGroup:
Name: cluster-node-c
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""

View File

@ -74,6 +74,7 @@ ServerGroup:
Name: cluster-bastion
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -156,6 +157,7 @@ ServerGroup:
Name: cluster-master
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -229,6 +231,7 @@ ServerGroup:
Name: cluster-node
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""

View File

@ -87,6 +87,7 @@ ServerGroup:
Name: cluster-bastion
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -169,6 +170,7 @@ ServerGroup:
Name: cluster-master
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -242,6 +244,7 @@ ServerGroup:
Name: cluster-node
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""

View File

@ -81,6 +81,7 @@ ServerGroup:
Name: cluster-master
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -154,6 +155,7 @@ ServerGroup:
Name: cluster-node
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""

View File

@ -101,6 +101,7 @@ ServerGroup:
Name: cluster-master
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -180,6 +181,7 @@ ServerGroup:
Name: cluster-node
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""

View File

@ -101,6 +101,7 @@ ServerGroup:
Name: tom-software-dev-playground-real33-k8s-local-master
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""
@ -180,6 +181,7 @@ ServerGroup:
Name: tom-software-dev-playground-real33-k8s-local-node
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""

View File

@ -71,6 +71,7 @@ ServerGroup:
Name: cluster-node
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""

View File

@ -71,6 +71,7 @@ ServerGroup:
Name: cluster-node
Policies:
- anti-affinity
Status: null
UserData:
task:
Lifecycle: ""

View File

@ -101,10 +101,6 @@ func createInstance(c OpenstackCloud, opt servers.CreateOptsBuilder, portID stri
var server *servers.Server
done, err := vfs.RetryWithBackoff(writeBackoff, func() (bool, error) {
if server != nil {
// Note: this will delete the server from the last try, even if it is now ACTIVE or still in BUILD state
c.DeleteInstanceWithID(server.ID)
}
v, err := servers.Create(c.ComputeClient(), opt).Extract()
if err != nil {
@ -131,7 +127,7 @@ func createInstance(c OpenstackCloud, opt servers.CreateOptsBuilder, portID stri
err = waitForStatusActive(c, server.ID, nil)
if err != nil {
return false, fmt.Errorf("error while waiting for server '%s' to become '%s': %v", server.ID, activeStatus, err)
return true, err
}
return true, nil

View File

@ -52,6 +52,7 @@ type Instance struct {
SecurityGroups []string
FloatingIP *FloatingIP
ConfigDrive *bool
Status *string
Lifecycle fi.Lifecycle
ForAPIServer bool
@ -198,6 +199,7 @@ func (e *Instance) Find(c *fi.CloudupContext) (*Instance, error) {
AvailabilityZone: e.AvailabilityZone,
GroupName: e.GroupName,
ConfigDrive: e.ConfigDrive,
Status: fi.PtrTo(server.Status),
}
ports, err := cloud.ListPorts(ports.ListOpts{
@ -244,6 +246,7 @@ func (e *Instance) Find(c *fi.CloudupContext) (*Instance, error) {
// Avoid flapping
e.ID = actual.ID
e.Status = fi.PtrTo(activeStatus)
actual.ForAPIServer = e.ForAPIServer
// Immutable fields
@ -281,6 +284,9 @@ func (_ *Instance) ShouldCreate(a, e, changes *Instance) (bool, error) {
if a == nil {
return true, nil
}
if fi.ValueOf(a.Status) == errorStatus {
return true, nil
}
if changes.Port != nil {
return true, nil
}
@ -309,7 +315,13 @@ func generateInstanceName(e *Instance) (string, error) {
func (_ *Instance) RenderOpenstack(t *openstack.OpenstackAPITarget, a, e, changes *Instance) error {
cloud := t.Cloud
if a == nil {
if a != nil && fi.ValueOf(a.Status) == errorStatus {
klog.V(2).Infof("Delete previously failed server: %s\n", fi.ValueOf(a.ID))
cloud.DeleteInstanceWithID(fi.ValueOf(a.ID))
}
if a == nil || fi.ValueOf(a.Status) == errorStatus {
serverName, err := generateInstanceName(e)
if err != nil {
return err