From 569d44a10fafbb5e9ad6535a927f8f6699ba54af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86gir=20M=C3=A1ni=20Hauksson?= Date: Sun, 22 Mar 2026 23:50:35 +0000 Subject: [PATCH 1/3] short circuit suspension if deleting resource and it is not found --- docs/component.md | 4 ++++ pkg/component/suspend.go | 33 ++++++++++++++++++++++++++++++--- pkg/component/suspend_test.go | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/docs/component.md b/docs/component.md index a6528235..9e0e9248 100644 --- a/docs/component.md +++ b/docs/component.md @@ -159,6 +159,10 @@ Suspension allows a component to be intentionally deactivated without deleting i 3. The component polls `SuspensionStatus()` on each resource. 4. Once all resources report `Suspended`, the condition transitions to `Suspended`. +Resources that do not yet exist in the cluster are created in their suspended state (with suspension mutations already applied). For example, a Deployment is created with zero replicas. This ensures the resource is immediately available when suspension ends. + +Resources with `DeleteOnSuspend` enabled (e.g., DaemonSets) are **not** created if they are already absent — their absence is treated as already suspended. This avoids a create→delete churn loop on every reconcile while the component remains suspended. + Resources that are not `Suspendable` are left in place. ## ReconcileContext diff --git a/pkg/component/suspend.go b/pkg/component/suspend.go index 957fae0c..3996b934 100644 --- a/pkg/component/suspend.go +++ b/pkg/component/suspend.go @@ -9,6 +9,7 @@ import ( "github.com/sourcehawk/operator-component-framework/pkg/component/concepts" v1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + "sigs.k8s.io/controller-runtime/pkg/client" ) type suspensionResults []concepts.SuspensionStatusWithReason @@ -72,14 +73,22 @@ func suspendResources( return results, nil } -// suspendResource handles the two-stage suspension process for a single resource: +// suspendResource handles the suspension process for a single resource: // -// Stage 1: Mutation +// Stage 1: Short-circuit for absent delete-on-suspend resources +// - If DeleteOnSuspend() is true and the object does not exist on the cluster, +// returns SuspensionStatusSuspended immediately without creating the resource. +// This prevents a create→delete churn loop on every reconcile while suspended. +// +// Stage 2: Mutation // - Applies suspension-specific mutations to the resource's desired state via Suspend(). // - Persists these mutations to the cluster using createOrUpdateResources. +// If the resource does not yet exist, it is created with suspension mutations already applied +// (e.g., a Deployment is created with zero replicas). This is intentional: the resource is +// immediately available in its suspended state, ready for when suspension ends. // - Checks if the resource has reached the Suspended state via SuspensionStatus(). // -// Stage 2: Optional Deletion +// Stage 3: Optional Deletion // - If DeleteOnSuspend() is true AND the resource has reached SuspensionStatusSuspended, // the Kubernetes object is deleted from the cluster. // - Deletion is deferred until the Suspended state is reached to allow for graceful @@ -98,6 +107,24 @@ func suspendResource( return concepts.SuspensionStatusWithReason{}, fmt.Errorf("failed to get object on suspension: %w", err) } + // Short-circuit: if the resource should be deleted on suspend and already doesn't exist, + // skip CreateOrUpdate to avoid a create->delete churn loop on every reconcile. + if suspendable.DeleteOnSuspend() { + existing := object.DeepCopyObject().(client.Object) + err := rec.Client.Get(ctx, client.ObjectKeyFromObject(object), existing) + if apierrors.IsNotFound(err) { + return concepts.SuspensionStatusWithReason{ + Status: concepts.SuspensionStatusSuspended, + Reason: fmt.Sprintf("Resource %s already deleted.", resource.Identity()), + }, nil + } + if err != nil { + return concepts.SuspensionStatusWithReason{}, fmt.Errorf( + "failed to check existence of resource %s on suspension: %w", resource.Identity(), err, + ) + } + } + // Apply suspension mutation (if any) _, err = createOrUpdateResources(ctx, rec, []Resource{resource}) if err != nil { diff --git a/pkg/component/suspend_test.go b/pkg/component/suspend_test.go index 4b03f841..3c402860 100644 --- a/pkg/component/suspend_test.go +++ b/pkg/component/suspend_test.go @@ -342,4 +342,36 @@ func TestSuspendResource(t *testing.T) { assert.Equal(t, concepts.SuspensionStatusSuspended, status.Status) cli.AssertCalled(t, "Delete", ctx, obj, mock.Anything) }) + + t.Run("should skip CreateOrUpdate when DeleteOnSuspend is true and object does not exist", func(t *testing.T) { + cli := &MockClient{} + rec := setupReconcileContext(scheme, setupTestOwner(), cli) + res, _ := setupMockResource("cm", concepts.SuspensionStatusSuspended, "Done", true) + + cli.On("Get", mock.Anything, mock.Anything, mock.Anything, mock.Anything). + Return(apierrors.NewNotFound(schema.GroupResource{Group: "", Resource: "configmaps"}, "cm")) + + status, err := suspendResource(ctx, rec, res, res) + require.NoError(t, err) + assert.Equal(t, concepts.SuspensionStatusSuspended, status.Status) + assert.Contains(t, status.Reason, "already deleted") + + // Verify CreateOrUpdate was never called (no Update or Create calls) + cli.AssertNotCalled(t, "Update", mock.Anything, mock.Anything, mock.Anything) + cli.AssertNotCalled(t, "Create", mock.Anything, mock.Anything, mock.Anything) + cli.AssertNotCalled(t, "Delete", mock.Anything, mock.Anything, mock.Anything) + }) + + t.Run("should return error if existence check fails for DeleteOnSuspend resource", func(t *testing.T) { + cli := &MockClient{} + rec := setupReconcileContext(scheme, setupTestOwner(), cli) + res, _ := setupMockResource("cm", concepts.SuspensionStatusSuspended, "Done", true) + + cli.On("Get", mock.Anything, mock.Anything, mock.Anything, mock.Anything). + Return(errors.New("network error")) + + _, err := suspendResource(ctx, rec, res, res) + require.Error(t, err) + assert.Contains(t, err.Error(), "network error") + }) } From 68e7344467e31dc26028c18baffbca773bc94426 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86gir=20M=C3=A1ni?= Date: Sun, 22 Mar 2026 23:51:29 +0000 Subject: [PATCH 2/3] Apply suggestion from @sourcehawk --- docs/component.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/component.md b/docs/component.md index 9e0e9248..d99d92cd 100644 --- a/docs/component.md +++ b/docs/component.md @@ -161,7 +161,7 @@ Suspension allows a component to be intentionally deactivated without deleting i Resources that do not yet exist in the cluster are created in their suspended state (with suspension mutations already applied). For example, a Deployment is created with zero replicas. This ensures the resource is immediately available when suspension ends. -Resources with `DeleteOnSuspend` enabled (e.g., DaemonSets) are **not** created if they are already absent — their absence is treated as already suspended. This avoids a create→delete churn loop on every reconcile while the component remains suspended. +Resources with `DeleteOnSuspend` enabled are **not** created if they are already absent — their absence is treated as already suspended. This avoids a create→delete churn loop on every reconcile while the component remains suspended. Resources that are not `Suspendable` are left in place. From 0cc82518f63b3286730cf82cc8f17c211067302e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86gir=20M=C3=A1ni=20Hauksson?= Date: Mon, 23 Mar 2026 00:07:41 +0000 Subject: [PATCH 3/3] address comments --- pkg/component/component_test.go | 4 ++++ pkg/component/suspend.go | 22 +++++++++++++++------- pkg/component/suspend_test.go | 12 +++++++++++- 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/pkg/component/component_test.go b/pkg/component/component_test.go index d7309229..03b856af 100644 --- a/pkg/component/component_test.go +++ b/pkg/component/component_test.go @@ -452,6 +452,10 @@ var _ = Describe("Component Reconciler", func() { // Given comp.suspended = true res := &MockSuspendableResource{} + res.On("Object").Return(&corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "failing-suspend-resource", Namespace: namespace}, + }, nil) + res.On("DeleteOnSuspend").Return(false) res.On("Suspend").Return(fmt.Errorf("suspend error")) res.On("Identity").Return("failing-suspend-resource") diff --git a/pkg/component/suspend.go b/pkg/component/suspend.go index 3996b934..b718a62a 100644 --- a/pkg/component/suspend.go +++ b/pkg/component/suspend.go @@ -79,9 +79,11 @@ func suspendResources( // - If DeleteOnSuspend() is true and the object does not exist on the cluster, // returns SuspensionStatusSuspended immediately without creating the resource. // This prevents a create→delete churn loop on every reconcile while suspended. +// The check runs before Suspend() to avoid queuing a mutation that will never be applied. // // Stage 2: Mutation -// - Applies suspension-specific mutations to the resource's desired state via Suspend(). +// - Registers suspension-specific mutations via Suspend(), then applies them to the +// resource's desired state. // - Persists these mutations to the cluster using createOrUpdateResources. // If the resource does not yet exist, it is created with suspension mutations already applied // (e.g., a Deployment is created with zero replicas). This is intentional: the resource is @@ -96,11 +98,6 @@ func suspendResources( func suspendResource( ctx context.Context, rec ReconcileContext, resource Resource, suspendable concepts.Suspendable, ) (concepts.SuspensionStatusWithReason, error) { - // Create suspension mutation on resource (if any) - if err := suspendable.Suspend(); err != nil { - return concepts.SuspensionStatusWithReason{}, fmt.Errorf("failed to suspend resource: %w", err) - } - // Get the object if possible object, err := resource.Object() if err != nil { @@ -109,8 +106,14 @@ func suspendResource( // Short-circuit: if the resource should be deleted on suspend and already doesn't exist, // skip CreateOrUpdate to avoid a create->delete churn loop on every reconcile. + // This check runs before Suspend() to avoid queuing a mutation that will never be applied. if suspendable.DeleteOnSuspend() { - existing := object.DeepCopyObject().(client.Object) + existing, ok := object.DeepCopyObject().(client.Object) + if !ok { + return concepts.SuspensionStatusWithReason{}, fmt.Errorf( + "failed to deep copy object of type %T", object, + ) + } err := rec.Client.Get(ctx, client.ObjectKeyFromObject(object), existing) if apierrors.IsNotFound(err) { return concepts.SuspensionStatusWithReason{ @@ -125,6 +128,11 @@ func suspendResource( } } + // Create suspension mutation on resource (if any) + if err := suspendable.Suspend(); err != nil { + return concepts.SuspensionStatusWithReason{}, fmt.Errorf("failed to suspend resource: %w", err) + } + // Apply suspension mutation (if any) _, err = createOrUpdateResources(ctx, rec, []Resource{resource}) if err != nil { diff --git a/pkg/component/suspend_test.go b/pkg/component/suspend_test.go index 3c402860..01a74be2 100644 --- a/pkg/component/suspend_test.go +++ b/pkg/component/suspend_test.go @@ -157,9 +157,15 @@ func TestSuspendResources(t *testing.T) { t.Run("should return joined errors if any suspension fails", func(t *testing.T) { rec := setupReconcileContext(scheme, nil, &MockClient{}) + obj1 := &v1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "r1", Namespace: "default"}} + obj2 := &v1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "r2", Namespace: "default"}} r1 := &MockSuspendableResource{} + r1.On("Object").Return(obj1, nil) + r1.On("DeleteOnSuspend").Return(false) r1.On("Suspend").Return(errors.New("fail1")) r2 := &MockSuspendableResource{} + r2.On("Object").Return(obj2, nil) + r2.On("DeleteOnSuspend").Return(false) r2.On("Suspend").Return(errors.New("fail2")) resources := []Resource{r1, r2} @@ -267,6 +273,9 @@ func TestSuspendResource(t *testing.T) { cli := &MockClient{} rec := setupReconcileContext(scheme, nil, cli) res := &MockSuspendableResource{} + obj := &v1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "cm", Namespace: "default"}} + res.On("Object").Return(obj, nil) + res.On("DeleteOnSuspend").Return(false) res.On("Suspend").Return(errors.New("suspend fail")) _, err := suspendResource(ctx, rec, res, res) @@ -278,7 +287,6 @@ func TestSuspendResource(t *testing.T) { cli := &MockClient{} rec := setupReconcileContext(scheme, nil, cli) res := &MockSuspendableResource{} - res.On("Suspend").Return(nil) res.On("Object").Return(nil, errors.New("object fail")) _, err := suspendResource(ctx, rec, res, res) @@ -356,6 +364,8 @@ func TestSuspendResource(t *testing.T) { assert.Equal(t, concepts.SuspensionStatusSuspended, status.Status) assert.Contains(t, status.Reason, "already deleted") + // Verify Suspend() was not called — no mutation should be queued when short-circuiting + res.AssertNotCalled(t, "Suspend") // Verify CreateOrUpdate was never called (no Update or Create calls) cli.AssertNotCalled(t, "Update", mock.Anything, mock.Anything, mock.Anything) cli.AssertNotCalled(t, "Create", mock.Anything, mock.Anything, mock.Anything)