fix: make deletion idempotent

- Don't error when a delete returns NotFound.
  This means the object was deleted asynchronously by another client
  between the initial GET for task scheduling and the delete task itself.
  Log a warning, because it's possible the deletion may have violated
  dependency ordering.
- Update apply and delte task logs to only log errors if the verbosity
  is greater than -v=4. This should help reduce duplicate log messages.
This commit is contained in:
Karl Isenberg 2022-06-07 15:39:35 -07:00
parent 0cb95eeda6
commit 45607b8abb
3 changed files with 85 additions and 39 deletions

View File

@ -103,6 +103,10 @@ func (p *Pruner) Prune(
uid := obj.GetUID()
if uid == "" {
err := object.NotFound([]interface{}{"metadata", "uid"}, "")
if klog.V(4).Enabled() {
// only log event emitted errors if the verbosity > 4
klog.Errorf("prune uid lookup errored (object: %s): %v", id, err)
}
taskContext.SendEvent(eventFactory.CreateFailedEvent(id, err))
taskContext.InventoryManager().AddFailedDelete(id)
continue
@ -116,7 +120,8 @@ func (p *Pruner) Prune(
if filterErr != nil {
var fatalErr *filter.FatalError
if errors.As(filterErr, &fatalErr) {
if klog.V(5).Enabled() {
if klog.V(4).Enabled() {
// only log event emitted errors if the verbosity > 4
klog.Errorf("prune filter errored (filter: %s, object: %s): %v", pruneFilter.Name(), id, fatalErr.Err)
}
taskContext.SendEvent(eventFactory.CreateFailedEvent(id, fatalErr.Err))
@ -134,6 +139,7 @@ func (p *Pruner) Prune(
obj, err = p.removeInventoryAnnotation(obj)
if err != nil {
if klog.V(4).Enabled() {
// only log event emitted errors if the verbosity > 4
klog.Errorf("error removing annotation (object: %q, annotation: %q): %v", id, inventory.OwningInventoryKey, err)
}
taskContext.SendEvent(eventFactory.CreateFailedEvent(id, err))
@ -167,12 +173,18 @@ func (p *Pruner) Prune(
PropagationPolicy: &opts.PropagationPolicy,
})
if err != nil {
if klog.V(4).Enabled() {
klog.Errorf("error deleting object (object: %q): %v", id, err)
if apierrors.IsNotFound(err) {
klog.Warningf("error deleting object (object: %q): object not found: object may have been deleted asynchronously by another client", id)
// treat this as successful idempotent deletion
} else {
if klog.V(4).Enabled() {
// only log event emitted errors if the verbosity > 4
klog.Errorf("error deleting object (object: %q): %v", id, err)
}
taskContext.SendEvent(eventFactory.CreateFailedEvent(id, err))
taskContext.InventoryManager().AddFailedDelete(id)
continue
}
taskContext.SendEvent(eventFactory.CreateFailedEvent(id, err))
taskContext.InventoryManager().AddFailedDelete(id)
continue
}
}
taskContext.InventoryManager().AddSuccessfulDelete(id, obj.GetUID())

View File

@ -184,6 +184,7 @@ var (
func TestPrune(t *testing.T) {
tests := map[string]struct {
clusterObjs []*unstructured.Unstructured
pruneObjs []*unstructured.Unstructured
pruneFilters []filter.ValidationFilter
options Options
@ -193,13 +194,15 @@ func TestPrune(t *testing.T) {
expectedAbandoned object.ObjMetadataSet
}{
"No pruned objects; no prune/delete events": {
clusterObjs: []*unstructured.Unstructured{},
pruneObjs: []*unstructured.Unstructured{},
options: defaultOptions,
expectedEvents: nil,
},
"One successfully pruned object": {
pruneObjs: []*unstructured.Unstructured{pod},
options: defaultOptions,
clusterObjs: []*unstructured.Unstructured{pod},
pruneObjs: []*unstructured.Unstructured{pod},
options: defaultOptions,
expectedEvents: []event.Event{
{
Type: event.PruneType,
@ -212,8 +215,9 @@ func TestPrune(t *testing.T) {
},
},
"Multiple successfully pruned object": {
pruneObjs: []*unstructured.Unstructured{pod, pdb, namespace},
options: defaultOptions,
clusterObjs: []*unstructured.Unstructured{pod, pdb, namespace},
pruneObjs: []*unstructured.Unstructured{pod, pdb, namespace},
options: defaultOptions,
expectedEvents: []event.Event{
{
Type: event.PruneType,
@ -242,8 +246,9 @@ func TestPrune(t *testing.T) {
},
},
"One successfully deleted object": {
pruneObjs: []*unstructured.Unstructured{pod},
options: defaultOptionsDestroy,
clusterObjs: []*unstructured.Unstructured{pod},
pruneObjs: []*unstructured.Unstructured{pod},
options: defaultOptionsDestroy,
expectedEvents: []event.Event{
{
Type: event.DeleteType,
@ -256,8 +261,9 @@ func TestPrune(t *testing.T) {
},
},
"Multiple successfully deleted objects": {
pruneObjs: []*unstructured.Unstructured{pod, pdb, namespace},
options: defaultOptionsDestroy,
clusterObjs: []*unstructured.Unstructured{pod, pdb, namespace},
pruneObjs: []*unstructured.Unstructured{pod, pdb, namespace},
options: defaultOptionsDestroy,
expectedEvents: []event.Event{
{
Type: event.DeleteType,
@ -286,8 +292,9 @@ func TestPrune(t *testing.T) {
},
},
"Client dry run still pruned event": {
pruneObjs: []*unstructured.Unstructured{pod},
options: clientDryRunOptions,
clusterObjs: []*unstructured.Unstructured{pod},
pruneObjs: []*unstructured.Unstructured{pod},
options: clientDryRunOptions,
expectedEvents: []event.Event{
{
Type: event.PruneType,
@ -300,7 +307,8 @@ func TestPrune(t *testing.T) {
},
},
"Server dry run still deleted event": {
pruneObjs: []*unstructured.Unstructured{pod},
clusterObjs: []*unstructured.Unstructured{pod},
pruneObjs: []*unstructured.Unstructured{pod},
options: Options{
DryRunStrategy: common.DryRunServer,
PropagationPolicy: metav1.DeletePropagationBackground,
@ -318,7 +326,8 @@ func TestPrune(t *testing.T) {
},
},
"UID match means prune skipped": {
pruneObjs: []*unstructured.Unstructured{pod},
clusterObjs: []*unstructured.Unstructured{pod},
pruneObjs: []*unstructured.Unstructured{pod},
pruneFilters: []filter.ValidationFilter{
filter.CurrentUIDFilter{
// Add pod UID to set of current UIDs
@ -344,7 +353,8 @@ func TestPrune(t *testing.T) {
},
},
"UID match for only one object one pruned, one skipped": {
pruneObjs: []*unstructured.Unstructured{pod, pdb},
clusterObjs: []*unstructured.Unstructured{pod, pdb},
pruneObjs: []*unstructured.Unstructured{pod, pdb},
pruneFilters: []filter.ValidationFilter{
filter.CurrentUIDFilter{
// Add pod UID to set of current UIDs
@ -378,6 +388,10 @@ func TestPrune(t *testing.T) {
},
},
"Prevent delete annotation equals prune skipped": {
clusterObjs: []*unstructured.Unstructured{
podDeletionPrevention,
testutil.Unstructured(t, pdbDeletePreventionManifest),
},
pruneObjs: []*unstructured.Unstructured{
podDeletionPrevention,
testutil.Unstructured(t, pdbDeletePreventionManifest),
@ -422,6 +436,10 @@ func TestPrune(t *testing.T) {
},
},
"Prevent delete annotation equals delete skipped": {
clusterObjs: []*unstructured.Unstructured{
podDeletionPrevention,
testutil.Unstructured(t, pdbDeletePreventionManifest),
},
pruneObjs: []*unstructured.Unstructured{
podDeletionPrevention,
testutil.Unstructured(t, pdbDeletePreventionManifest),
@ -466,6 +484,7 @@ func TestPrune(t *testing.T) {
},
},
"Prevent delete annotation, one skipped, one pruned": {
clusterObjs: []*unstructured.Unstructured{podDeletionPrevention, pod},
pruneObjs: []*unstructured.Unstructured{podDeletionPrevention, pod},
pruneFilters: []filter.ValidationFilter{filter.PreventRemoveFilter{}},
options: defaultOptions,
@ -500,7 +519,8 @@ func TestPrune(t *testing.T) {
},
},
"Namespace prune skipped": {
pruneObjs: []*unstructured.Unstructured{namespace},
clusterObjs: []*unstructured.Unstructured{namespace},
pruneObjs: []*unstructured.Unstructured{namespace},
pruneFilters: []filter.ValidationFilter{
filter.LocalNamespacesFilter{
LocalNamespaces: sets.NewString(namespace.GetName()),
@ -524,19 +544,34 @@ func TestPrune(t *testing.T) {
object.UnstructuredToObjMetadata(namespace),
},
},
"Deletion of already deleted object": {
clusterObjs: []*unstructured.Unstructured{},
pruneObjs: []*unstructured.Unstructured{pod},
options: defaultOptionsDestroy,
expectedEvents: []event.Event{
{
Type: event.DeleteType,
DeleteEvent: event.DeleteEvent{
Identifier: object.UnstructuredToObjMetadata(pod),
Status: event.DeleteSuccessful,
Object: pod,
},
},
},
},
}
for name, tc := range tests {
t.Run(name, func(t *testing.T) {
// Set up the fake dynamic client to recognize all objects, and the RESTMapper.
objs := make([]runtime.Object, 0, len(tc.pruneObjs))
for _, obj := range tc.pruneObjs {
objs = append(objs, obj)
clusterObjs := make([]runtime.Object, 0, len(tc.clusterObjs))
for _, obj := range tc.clusterObjs {
clusterObjs = append(clusterObjs, obj)
}
pruneIds := object.UnstructuredSetToObjMetadataSet(tc.pruneObjs)
po := Pruner{
InvClient: inventory.NewFakeClient(pruneIds),
Client: fake.NewSimpleDynamicClient(scheme.Scheme, objs...),
Client: fake.NewSimpleDynamicClient(scheme.Scheme, clusterObjs...),
Mapper: testrestmapper.TestOnlyStaticRESTMapper(scheme.Scheme,
scheme.Scheme.PrioritizedVersionsAllGroups()...),
}

View File

@ -99,14 +99,12 @@ func (a *ApplyTask) Start(taskContext *taskrunner.TaskContext) {
obj = info.Object.(*unstructured.Unstructured)
id := object.UnstructuredToObjMetadata(obj)
if err != nil {
err = applyerror.NewUnknownTypeError(err)
if klog.V(4).Enabled() {
klog.Errorf("unable to convert obj to info for %s/%s (%s)--continue",
obj.GetNamespace(), obj.GetName(), err)
// only log event emitted errors if the verbosity > 4
klog.Errorf("apply task errored (object: %s): unable to convert obj to info: %v", id, err)
}
taskContext.SendEvent(a.createApplyFailedEvent(
id,
applyerror.NewUnknownTypeError(err),
))
taskContext.SendEvent(a.createApplyFailedEvent(id, err))
taskContext.InventoryManager().AddFailedApply(id)
continue
}
@ -119,7 +117,8 @@ func (a *ApplyTask) Start(taskContext *taskrunner.TaskContext) {
if filterErr != nil {
var fatalErr *filter.FatalError
if errors.As(filterErr, &fatalErr) {
if klog.V(5).Enabled() {
if klog.V(4).Enabled() {
// only log event emitted errors if the verbosity > 4
klog.Errorf("apply filter errored (filter: %s, object: %s): %v", applyFilter.Name(), id, fatalErr.Err)
}
taskContext.SendEvent(a.createApplyFailedEvent(id, err))
@ -139,8 +138,9 @@ func (a *ApplyTask) Start(taskContext *taskrunner.TaskContext) {
// Execute mutators, if any apply
err = a.mutate(ctx, obj)
if err != nil {
if klog.V(5).Enabled() {
klog.Errorf("error mutating: %v", err)
if klog.V(4).Enabled() {
// only log event emitted errors if the verbosity > 4
klog.Errorf("apply mutation errored (object: %s): %v", id, err)
}
taskContext.SendEvent(a.createApplyFailedEvent(id, err))
taskContext.InventoryManager().AddFailedApply(id)
@ -152,7 +152,7 @@ func (a *ApplyTask) Start(taskContext *taskrunner.TaskContext) {
ao := applyOptionsFactoryFunc(a.Name(), taskContext.EventChannel(),
a.ServerSideOptions, a.DryRunStrategy, a.DynamicClient, a.OpenAPIGetter)
ao.SetObjects([]*resource.Info{info})
klog.V(5).Infof("applying %s/%s...", info.Namespace, info.Name)
klog.V(5).Infof("applying object: %v", id)
err = ao.Run()
if err != nil && a.ServerSideOptions.ServerSideApply && isAPIService(obj) && isStreamError(err) {
// Server-side Apply doesn't work with APIService before k8s 1.21
@ -161,13 +161,12 @@ func (a *ApplyTask) Start(taskContext *taskrunner.TaskContext) {
err = a.clientSideApply(info, taskContext.EventChannel())
}
if err != nil {
err = applyerror.NewApplyRunError(err)
if klog.V(4).Enabled() {
klog.Errorf("error applying (%s/%s) %s", info.Namespace, info.Name, err)
// only log event emitted errors if the verbosity > 4
klog.Errorf("apply errored (object: %s): %v", id, err)
}
taskContext.SendEvent(a.createApplyFailedEvent(
id,
applyerror.NewApplyRunError(err),
))
taskContext.SendEvent(a.createApplyFailedEvent(id, err))
taskContext.InventoryManager().AddFailedApply(id)
} else if info.Object != nil {
acc, err := meta.Accessor(info.Object)