From a2133eb634b964bd44e27e647bd8a8b84b5da643 Mon Sep 17 00:00:00 2001 From: Omer Tuchfeld Date: Tue, 14 Apr 2026 15:59:36 +0200 Subject: [PATCH 1/2] Avoid using `lb-ext.kubeconfig` for seedgen cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Background During cleanup before seed generation, we want to delete all pods in the Succeeded or Failed phase. # Issue The current implementation uses `oc delete pod --all-namespaces --field-selector=status.phase==...` with the node `lb-ext.kubeconfig` as a kubeconfig. This is in contrast to the rest of the controller, which uses the in-cluster config via a client loaded with ctrl.GetConfigOrDie(). We've had users report TLS errors when the controller tries to use the lb-ext.kubeconfig, which seems expected since that kubeconfig is meant for external use and may not have the correct CA or credentials for in-cluster API access. Before they hit that error, the client has already been used, so it proves that the in-cluster config works, and this is specifically an issue with using the lb-ext.kubeconfig for this cleanup step. # Solution The solution is to avoid using the lb-ext.kubeconfig for this cleanup step, and instead use the same in-cluster client that the rest of the controller uses. # Caveat As the existing TODO states: ```go // TODO: Can this be done cleanly via client? The client.DeleteAllOf seems to require a specified namespace, so maybe loop over the namespaces ``` The Kubernetes API does not support DeleteCollection without a namespace for namespaced resources, so we cannot simply use DeleteAllOf with no namespace. Instead, we copy the behavior of kubectl when using `delete --all-namespaces`, which is to list the resources with the field selector, and then delete them one by one. This is less efficient than a single DeleteCollection call, but it's already what kubectl was doing when we were calling it, so we know it works and is acceptable for this use case. # Implementation Details We cannot use `client.MatchingFields` because we do not have an index set up and it seems overkill to do so. # Agent Interaction Agent assisted. The conversation went something like this: - Where does r.Client get its kubeconfig? → ctrl.GetConfigOrDie() → in-cluster SA - Where does the oc call get its kubeconfig? → lb-ext.kubeconfig on the host via chroot executor - Can we generate a kubeconfig from rest.Config? → no built-in utility, hand-rolled versions kept being incomplete - Just use DeleteAllOf with no namespace? → tried it, in a toy project using `sigs.k8s.io/controller-runtime/pkg/envtest`, API rejects it (no cluster-scoped DeleteCollection for namespaced resources) - Inspect kubectl source - what does it do when delete --all-namespaces? → list + delete one by one, sequentially - Did that, modified toy project to prove it works, updated the controller, added comment explaining the reasoning --- controllers/seedgen_controller.go | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/controllers/seedgen_controller.go b/controllers/seedgen_controller.go index 76b5d79143..8023549109 100644 --- a/controllers/seedgen_controller.go +++ b/controllers/seedgen_controller.go @@ -279,6 +279,22 @@ func (r *SeedGeneratorReconciler) cleanupOldRenderedMachineConfigs(ctx context.C return nil } +func (r *SeedGeneratorReconciler) deletePodsWithPhase(ctx context.Context, phase corev1.PodPhase) error { + podList := &corev1.PodList{} + if err := r.Client.List(ctx, podList); err != nil { + return fmt.Errorf("failed to list pods: %w", err) + } + for i := range podList.Items { + if podList.Items[i].Status.Phase != phase { + continue + } + if err := r.Client.Delete(ctx, &podList.Items[i]); err != nil { + return fmt.Errorf("failed to delete pod %s/%s: %w", podList.Items[i].Namespace, podList.Items[i].Name, err) + } + } + return nil +} + // Clean up ACM and other resources on the cluster func (r *SeedGeneratorReconciler) cleanupClusterResources(ctx context.Context) error { // Ensure that the dependent resources are deleted @@ -835,15 +851,13 @@ func (r *SeedGeneratorReconciler) generateSeedImage(ctx context.Context, seedgen return } - // TODO: Can this be done cleanly via client? The client.DeleteAllOf seems to require a specified namespace, so maybe loop over the namespaces r.Log.Info("Cleaning completed and failed pods") - kubeconfigArg := fmt.Sprintf("--kubeconfig=%s", common.KubeconfigFile) - if _, err := r.Executor.Execute("oc", "delete", "pod", kubeconfigArg, "--field-selector=status.phase==Succeeded", "--all-namespaces"); err != nil { + if err := r.deletePodsWithPhase(ctx, corev1.PodSucceeded); err != nil { rc = fmt.Errorf("failed to cleanup Succeeded pods: %w", err) setSeedGenStatusFailed(seedgen, rc.Error()) return } - if _, err := r.Executor.Execute("oc", "delete", "pod", kubeconfigArg, "--field-selector=status.phase==Failed", "--all-namespaces"); err != nil { + if err := r.deletePodsWithPhase(ctx, corev1.PodFailed); err != nil { rc = fmt.Errorf("failed to cleanup Failed pods: %w", err) setSeedGenStatusFailed(seedgen, rc.Error()) return From 9023cdabbb3396913b75e49dc23b6a49e260d84c Mon Sep 17 00:00:00 2001 From: Jun Chen Date: Sat, 18 Apr 2026 09:33:02 -0400 Subject: [PATCH 2/2] Update seedgen_controller.go --- controllers/seedgen_controller.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/controllers/seedgen_controller.go b/controllers/seedgen_controller.go index 8023549109..5047eaba9d 100644 --- a/controllers/seedgen_controller.go +++ b/controllers/seedgen_controller.go @@ -281,14 +281,14 @@ func (r *SeedGeneratorReconciler) cleanupOldRenderedMachineConfigs(ctx context.C func (r *SeedGeneratorReconciler) deletePodsWithPhase(ctx context.Context, phase corev1.PodPhase) error { podList := &corev1.PodList{} - if err := r.Client.List(ctx, podList); err != nil { + if err := r.List(ctx, podList); err != nil { return fmt.Errorf("failed to list pods: %w", err) } for i := range podList.Items { if podList.Items[i].Status.Phase != phase { continue } - if err := r.Client.Delete(ctx, &podList.Items[i]); err != nil { + if err := r.Delete(ctx, &podList.Items[i]); err != nil { return fmt.Errorf("failed to delete pod %s/%s: %w", podList.Items[i].Namespace, podList.Items[i].Name, err) } }