OpenStack: Use task engine to retry failed servers

This will make use of the kOps taks engine to retry failed servers.

The former approach had the side effect of not making kOps fail when the
last retry failed:

Because there is now a server present - although in an erroneous state -
the "instance task" which the task engine retried reconciled the server
(port, floating ip) instead of recreating it again.

With the approach of letting the task engine retry the failed servers
this will be handled correctly.
This commit is contained in:
ederst 2023-04-06 22:33:28 +02:00
parent c3f28b25d2
commit 65dd16a301
2 changed files with 14 additions and 6 deletions

View File

@ -101,10 +101,6 @@ func createInstance(c OpenstackCloud, opt servers.CreateOptsBuilder, portID stri
var server *servers.Server
done, err := vfs.RetryWithBackoff(writeBackoff, func() (bool, error) {
if server != nil {
// Note: this will delete the server from the last try, even if it is now ACTIVE or still in BUILD state
c.DeleteInstanceWithID(server.ID)
}
v, err := servers.Create(c.ComputeClient(), opt).Extract()
if err != nil {
@ -131,7 +127,7 @@ func createInstance(c OpenstackCloud, opt servers.CreateOptsBuilder, portID stri
err = waitForStatusActive(c, server.ID, nil)
if err != nil {
return false, fmt.Errorf("error while waiting for server '%s' to become '%s': %v", server.ID, activeStatus, err)
return true, err
}
return true, nil

View File

@ -52,6 +52,7 @@ type Instance struct {
SecurityGroups []string
FloatingIP *FloatingIP
ConfigDrive *bool
Status *string
Lifecycle fi.Lifecycle
ForAPIServer bool
@ -198,6 +199,7 @@ func (e *Instance) Find(c *fi.CloudupContext) (*Instance, error) {
AvailabilityZone: e.AvailabilityZone,
GroupName: e.GroupName,
ConfigDrive: e.ConfigDrive,
Status: fi.PtrTo(server.Status),
}
ports, err := cloud.ListPorts(ports.ListOpts{
@ -244,6 +246,7 @@ func (e *Instance) Find(c *fi.CloudupContext) (*Instance, error) {
// Avoid flapping
e.ID = actual.ID
e.Status = fi.PtrTo(activeStatus)
actual.ForAPIServer = e.ForAPIServer
// Immutable fields
@ -281,6 +284,9 @@ func (_ *Instance) ShouldCreate(a, e, changes *Instance) (bool, error) {
if a == nil {
return true, nil
}
if fi.ValueOf(a.Status) == errorStatus {
return true, nil
}
if changes.Port != nil {
return true, nil
}
@ -309,7 +315,13 @@ func generateInstanceName(e *Instance) (string, error) {
func (_ *Instance) RenderOpenstack(t *openstack.OpenstackAPITarget, a, e, changes *Instance) error {
cloud := t.Cloud
if a == nil {
if a != nil && fi.ValueOf(a.Status) == errorStatus {
klog.V(2).Infof("Delete previously failed server: %s\n", fi.ValueOf(a.ID))
cloud.DeleteInstanceWithID(fi.ValueOf(a.ID))
}
if a == nil || fi.ValueOf(a.Status) == errorStatus {
serverName, err := generateInstanceName(e)
if err != nil {
return err