fix: retain objects which failed to reconcile

This change updates the set-inventory logic to retain objects which
failed to reconcile. This ensures that if you run the applier/destroyer
multiple times, an object that is failing to reconcile will be retained
in the inventory. Before this change, an object failing to reconcile
could be lost after multiple attempts (e.g. multiple destroys).
This commit is contained in:
Sam Dowell 2023-07-17 13:56:04 -07:00
parent 833d70fad2
commit cd911bfaa3
4 changed files with 102 additions and 41 deletions

View File

@ -8,7 +8,6 @@ import (
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime/schema"
"sigs.k8s.io/cli-utils/pkg/apis/actuation"
"sigs.k8s.io/cli-utils/pkg/apply/cache"
"sigs.k8s.io/cli-utils/pkg/apply/event"
"sigs.k8s.io/cli-utils/pkg/apply/taskrunner"
@ -24,6 +23,7 @@ func TestDeleteInvTask(t *testing.T) {
id3 := object.UnstructuredToObjMetadata(obj3)
testCases := map[string]struct {
prevInventory object.ObjMetadataSet
deletedObjs object.ObjMetadataSet
failedDeletes object.ObjMetadataSet
failedReconciles object.ObjMetadataSet
timeoutReconciles object.ObjMetadataSet
@ -54,6 +54,7 @@ func TestDeleteInvTask(t *testing.T) {
},
"inventory is updated instead of deleted in case of reconcile failure": {
prevInventory: object.ObjMetadataSet{id1, id2, id3},
deletedObjs: object.ObjMetadataSet{id1, id2, id3},
failedReconciles: object.ObjMetadataSet{id1},
err: nil,
isError: false,
@ -61,6 +62,7 @@ func TestDeleteInvTask(t *testing.T) {
},
"inventory is updated instead of deleted in case of reconcile timeout": {
prevInventory: object.ObjMetadataSet{id1, id2, id3},
deletedObjs: object.ObjMetadataSet{id1, id2, id3},
timeoutReconciles: object.ObjMetadataSet{id1},
err: nil,
isError: false,
@ -68,6 +70,7 @@ func TestDeleteInvTask(t *testing.T) {
},
"inventory is updated instead of deleted in case of pruning/reconcile failure": {
prevInventory: object.ObjMetadataSet{id1, id2, id3},
deletedObjs: object.ObjMetadataSet{id1, id2, id3},
failedReconciles: object.ObjMetadataSet{id1},
failedDeletes: object.ObjMetadataSet{id2},
err: nil,
@ -82,34 +85,20 @@ func TestDeleteInvTask(t *testing.T) {
eventChannel := make(chan event.Event)
resourceCache := cache.NewResourceCacheMap()
context := taskrunner.NewTaskContext(eventChannel, resourceCache)
im := context.InventoryManager()
for _, deleteObj := range tc.deletedObjs {
im.AddSuccessfulDelete(deleteObj, "unused-uid")
}
for _, failedDelete := range tc.failedDeletes {
context.InventoryManager().AddFailedDelete(failedDelete)
im.AddFailedDelete(failedDelete)
}
for _, failedReconcile := range tc.failedReconciles {
context.InventoryManager().SetObjectStatus(actuation.ObjectStatus{
ObjectReference: actuation.ObjectReference{
Group: failedReconcile.GroupKind.Group,
Kind: failedReconcile.GroupKind.Kind,
Name: failedReconcile.Name,
Namespace: failedReconcile.Namespace,
},
})
context.AddInvalidObject(failedReconcile)
if err := context.InventoryManager().SetFailedReconcile(failedReconcile); err != nil {
if err := im.SetFailedReconcile(failedReconcile); err != nil {
t.Fatal(err)
}
}
for _, timeoutReconcile := range tc.timeoutReconciles {
context.InventoryManager().SetObjectStatus(actuation.ObjectStatus{
ObjectReference: actuation.ObjectReference{
Group: timeoutReconcile.GroupKind.Group,
Kind: timeoutReconcile.GroupKind.Kind,
Name: timeoutReconcile.Name,
Namespace: timeoutReconcile.Namespace,
},
})
context.AddInvalidObject(timeoutReconcile)
if err := context.InventoryManager().SetTimeoutReconcile(timeoutReconcile); err != nil {
if err := im.SetTimeoutReconcile(timeoutReconcile); err != nil {
t.Fatal(err)
}
}

View File

@ -131,6 +131,18 @@ func (i *DeleteOrUpdateInvTask) updateInventory(taskContext *taskrunner.TaskCont
klog.V(4).Infof("keep in inventory %d skipped prunes", len(pruneSkips))
invObjs = invObjs.Union(pruneSkips)
// If an object failed to reconcile and was previously stored in the inventory,
// then keep it in the inventory so it can be waited on next time.
reconcileFailures := i.PrevInventory.Intersection(im.FailedReconciles())
klog.V(4).Infof("set inventory %d failed reconciles", len(reconcileFailures))
invObjs = invObjs.Union(reconcileFailures)
// If an object timed out reconciling and was previously stored in the inventory,
// then keep it in the inventory so it can be waited on next time.
reconcileTimeouts := i.PrevInventory.Intersection(im.TimeoutReconciles())
klog.V(4).Infof("keep in inventory %d timeout reconciles", len(reconcileTimeouts))
invObjs = invObjs.Union(reconcileTimeouts)
// If an object is abandoned, then remove it from the inventory.
abandonedObjects := taskContext.AbandonedObjects()
klog.V(4).Infof("remove from inventory %d abandoned objects", len(abandonedObjects))

View File

@ -29,15 +29,18 @@ func TestInvSetTask(t *testing.T) {
idInvalid := object.UnstructuredToObjMetadata(objInvalid)
tests := map[string]struct {
prevInventory object.ObjMetadataSet
appliedObjs object.ObjMetadataSet
failedApplies object.ObjMetadataSet
failedDeletes object.ObjMetadataSet
skippedApplies object.ObjMetadataSet
skippedDeletes object.ObjMetadataSet
abandonedObjs object.ObjMetadataSet
invalidObjs object.ObjMetadataSet
expectedObjs object.ObjMetadataSet
prevInventory object.ObjMetadataSet
appliedObjs object.ObjMetadataSet
deletedObjs object.ObjMetadataSet
failedApplies object.ObjMetadataSet
failedDeletes object.ObjMetadataSet
skippedApplies object.ObjMetadataSet
skippedDeletes object.ObjMetadataSet
failedReconciles object.ObjMetadataSet
timeoutReconciles object.ObjMetadataSet
abandonedObjs object.ObjMetadataSet
invalidObjs object.ObjMetadataSet
expectedObjs object.ObjMetadataSet
}{
"no apply objs, no prune failures; no inventory": {
expectedObjs: object.ObjMetadataSet{},
@ -160,6 +163,30 @@ func TestInvSetTask(t *testing.T) {
invalidObjs: object.ObjMetadataSet{idInvalid},
expectedObjs: object.ObjMetadataSet{id3},
},
"applied object failed to reconcile": {
prevInventory: object.ObjMetadataSet{},
appliedObjs: object.ObjMetadataSet{id3},
failedReconciles: object.ObjMetadataSet{id3},
expectedObjs: object.ObjMetadataSet{id3},
},
"deleted object failed to reconcile": {
prevInventory: object.ObjMetadataSet{id3},
deletedObjs: object.ObjMetadataSet{id3},
failedReconciles: object.ObjMetadataSet{id3},
expectedObjs: object.ObjMetadataSet{id3},
},
"applied object timed out to reconcile": {
prevInventory: object.ObjMetadataSet{},
appliedObjs: object.ObjMetadataSet{id3},
timeoutReconciles: object.ObjMetadataSet{id3},
expectedObjs: object.ObjMetadataSet{id3},
},
"deleted object timed out to reconcile": {
prevInventory: object.ObjMetadataSet{id3},
deletedObjs: object.ObjMetadataSet{id3},
timeoutReconciles: object.ObjMetadataSet{id3},
expectedObjs: object.ObjMetadataSet{id3},
},
}
for name, tc := range tests {
@ -180,6 +207,9 @@ func TestInvSetTask(t *testing.T) {
for _, applyObj := range tc.appliedObjs {
im.AddSuccessfulApply(applyObj, "unusued-uid", int64(0))
}
for _, deleteObj := range tc.deletedObjs {
im.AddSuccessfulDelete(deleteObj, "unused-uid")
}
for _, applyFailure := range tc.failedApplies {
im.AddFailedApply(applyFailure)
}
@ -198,6 +228,16 @@ func TestInvSetTask(t *testing.T) {
for _, invalidObj := range tc.invalidObjs {
context.AddInvalidObject(invalidObj)
}
for _, failedReconcile := range tc.failedReconciles {
if err := im.SetFailedReconcile(failedReconcile); err != nil {
t.Fatal(err)
}
}
for _, timeoutReconcile := range tc.timeoutReconciles {
if err := im.SetTimeoutReconcile(timeoutReconcile); err != nil {
t.Fatal(err)
}
}
if taskName != task.Name() {
t.Errorf("expected task name (%s), got (%s)", taskName, task.Name())
}

View File

@ -61,20 +61,40 @@ func destroyReconciliationFailureTest(ctx context.Context, c client.Client, invC
options := apply.DestroyerOptions{
InventoryPolicy: inventory.PolicyAdoptIfNoInventory,
DeleteTimeout: 15 * time.Second, // one pod is expected to be not pruned, so set a shorter timeout
DeleteTimeout: 10 * time.Second, // one pod is expected to be not pruned, so set a short timeout
}
_ = e2eutil.RunCollect(destroyer.Run(ctx, inv, options))
// we should be able to run destroy multiple times and continue tracking the
// object in the inventory
expectedCounts := []struct {
specCount int
statusCount int
}{
{
specCount: 1, // one object failed to reconcile, so is retained
statusCount: 2, // status for two objects, one deleted successfully
},
{
specCount: 1,
statusCount: 1, // only one object in inventory now, still failing to reconcile
},
{
specCount: 1,
statusCount: 1,
},
}
for _, ec := range expectedCounts {
_ = e2eutil.RunCollect(destroyer.Run(ctx, inv, options))
By("Verify pod1 is deleted")
e2eutil.AssertUnstructuredDoesNotExist(ctx, c, podObject)
By("Verify pod1 is deleted")
e2eutil.AssertUnstructuredDoesNotExist(ctx, c, podObject)
By("Verify podWithFinalizerObject is not deleted but has deletion timestamp")
podWithFinalizerObject = e2eutil.AssertHasDeletionTimestamp(ctx, c, podWithFinalizerObject)
By("Verify inventory")
// The inventory should still have the Pod with the finalizer.
invConfig.InvSizeVerifyFunc(ctx, c, inventoryName, namespaceName, inventoryID, 0, 2)
By("Verify podWithFinalizerObject is not deleted but has deletion timestamp")
podWithFinalizerObject = e2eutil.AssertHasDeletionTimestamp(ctx, c, podWithFinalizerObject)
By("Verify inventory")
// The inventory should still have the Pod with the finalizer.
invConfig.InvSizeVerifyFunc(ctx, c, inventoryName, namespaceName, inventoryID, ec.specCount, ec.statusCount)
}
// remove the finalizer
podWithFinalizerObject = e2eutil.WithoutFinalizers(podWithFinalizerObject)
e2eutil.ApplyUnstructured(ctx, c, podWithFinalizerObject)