diff --git a/test/extended/machine_config/pinnedimages.go b/test/extended/machine_config/pinnedimages.go index dc2a53054864..945d6bb8f5cc 100644 --- a/test/extended/machine_config/pinnedimages.go +++ b/test/extended/machine_config/pinnedimages.go @@ -94,8 +94,6 @@ var _ = g.Describe("[Suite:openshift/machine-config-operator/disruptive][sig-mco // Add node to custom MCP & wait for the node to be ready in the MCP optedNodes, err := addWorkerNodesToCustomPool(oc, kubeClient, 1, "custom") o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error adding node to `custom` MCP: %v", err)) - defer waitTillNodeReadyWithConfig(kubeClient, optedNodes[0], workerConfigPrefix) - defer unlabelNode(oc, optedNodes[0]) framework.Logf("Waiting for `%v` node to be ready in `custom` MCP.", optedNodes[0]) waitTillNodeReadyWithConfig(kubeClient, optedNodes[0], customConfigPrefix) @@ -129,12 +127,28 @@ var _ = g.Describe("[Suite:openshift/machine-config-operator/disruptive][sig-mco // Apply PIS defer deletePinnedImages(oc, kubeClient, clientSet, optedNodes, pinnedImages, isMetalDisconnected) - defer deletePIS(oc, pis.Name) err = applyPIS(oc, pisFixture, pis, pisDiverged) o.Expect(err).NotTo(o.HaveOccurred(), "Applied PIS") // Test the images applied in the PIS exist on the node after garbage collection. GCPISTest(oc, kubeClient, clientSet, true, optedNodes[0], kcFixture, gcImage, pis.Name, isMetalDisconnected) + + // Cleanup: Transition node back to worker pool BEFORE deleting configs to avoid race condition. + // If we delete the configs while the node is rebooting to transition back to worker pool, + // the node may come up with a deleted rendered-custom config and get stuck in degraded state. + framework.Logf("Cleaning up: Unlabeling node '%s' to move back to worker pool", optedNodes[0]) + err = unlabelNode(oc, optedNodes[0]) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error unlabeling node: %v", err)) + + framework.Logf("Waiting for node '%s' to transition to worker config", optedNodes[0]) + waitTillNodeReadyWithConfig(kubeClient, optedNodes[0], workerConfigPrefix) + + // Now it's safe to delete the custom pool configs + framework.Logf("Node transitioned successfully, deleting KubeletConfig and PinnedImageSet") + err = deleteKC(oc, "custom-gc-config") + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error deleting KubeletConfig: %v", err)) + err = deletePIS(oc, pis.Name) + o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error deleting PinnedImageSet: %v", err)) }) g.It("All Nodes in a Custom Pool should have the PinnedImages in PIS [apigroup:machineconfiguration.openshift.io]", func() { @@ -384,7 +398,6 @@ func addWorkerNodesToCustomPool(oc *exutil.CLI, kubeClient *kubernetes.Clientset // `GCPISTest` completes the body of a PIS test including the garbage collection step func GCPISTest(oc *exutil.CLI, kubeClient *kubernetes.Clientset, clientSet *mcClient.Clientset, success bool, nodeName, customGcKCFixture, gcImage, pisName string, isMetalDisconnected bool) { // Apply KC to Pool - defer deleteKC(oc, "custom-gc-config") err := oc.Run("apply").Args("-f", customGcKCFixture).Execute() o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Error applying garbage collection kubelet config: %s", err))