From 78e49b4c6866eb5712a1d30d122c9805167367a9 Mon Sep 17 00:00:00 2001 From: Luca Consalvi Date: Wed, 1 Apr 2026 16:13:11 +0200 Subject: [PATCH 1/2] OCPEDGE-2491: Log pcs status and etcd member list after every recovery test Add post-test logging of `sudo pcs status` and `sudo podman exec etcd etcdctl member list -w table` via SSH through the hypervisor after every recovery test (pass or fail). This provides visibility into the final cluster state without relying on the Kubernetes API, which may be unavailable after recovery tests. The logging is registered via DeferCleanup in BeforeEach and is gated on HasHypervisorConfig(). Errors are logged but never fail the test. Co-Authored-By: Claude Opus 4.6 --- test/extended/two_node/tnf_recovery.go | 73 ++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/test/extended/two_node/tnf_recovery.go b/test/extended/two_node/tnf_recovery.go index b8a65e1ba223..c3dbbbba5db3 100644 --- a/test/extended/two_node/tnf_recovery.go +++ b/test/extended/two_node/tnf_recovery.go @@ -78,6 +78,11 @@ var _ = g.Describe("[sig-etcd][apigroup:config.openshift.io][OCPFeatureGate:Dual peerNode = nodes.Items[randomIndex] // Select the remaining index targetNode = nodes.Items[(randomIndex+1)%len(nodes.Items)] + + // Log final pcs and etcd status after every test (pass or fail) via SSH + g.DeferCleanup(func() { + logFinalClusterStatus([]corev1.Node{peerNode, targetNode}) + }) }) g.It("should recover from graceful node shutdown with etcd member re-addition", func() { @@ -778,6 +783,74 @@ func restartVms(dataPair []vmNodePair, c hypervisorExtendedConfig) { } } +// logFinalClusterStatus logs pcs status and etcd member list via SSH after every test +// (pass or fail). Uses the hypervisor SSH path because the Kubernetes API may not be +// available after a recovery test. Errors are logged but never fail the test. +func logFinalClusterStatus(nodes []corev1.Node) { + if !exutil.HasHypervisorConfig() { + return + } + + sshConfig := exutil.GetHypervisorConfig() + hypervisorConfig := core.SSHConfig{ + IP: sshConfig.HypervisorIP, + User: sshConfig.SSHUser, + PrivateKeyPath: sshConfig.PrivateKeyPath, + } + + if _, err := os.Stat(hypervisorConfig.PrivateKeyPath); os.IsNotExist(err) { + framework.Logf("Skipping final cluster status: private key not found at %s", hypervisorConfig.PrivateKeyPath) + return + } + + knownHostsPath, err := core.PrepareLocalKnownHostsFile(&hypervisorConfig) + if err != nil { + framework.Logf("Skipping final cluster status: failed to prepare known hosts: %v", err) + return + } + + framework.Logf("========== FINAL CLUSTER STATUS ==========") + + for _, node := range nodes { + nodeIP := utils.GetNodeInternalIP(&node) + if nodeIP == "" { + framework.Logf("Skipping node %s: no internal IP", node.Name) + continue + } + + remoteKnownHostsPath, err := core.PrepareRemoteKnownHostsFile(nodeIP, &hypervisorConfig, knownHostsPath) + if err != nil { + framework.Logf("Failed to prepare remote known hosts for node %s: %v", node.Name, err) + continue + } + + // pcs status + pcsOutput, _, pcsErr := services.PcsStatus(nodeIP, &hypervisorConfig, knownHostsPath, remoteKnownHostsPath) + if pcsErr != nil { + framework.Logf("Failed to get pcs status from node %s: %v", node.Name, pcsErr) + } else { + framework.Logf("pcs status from node %s:\n%s", node.Name, pcsOutput) + } + + // etcd member list via SSH (-w table is the etcdctl v3 flag for table output) + etcdOutput, _, etcdErr := core.ExecuteRemoteSSHCommand(nodeIP, + "sudo podman exec etcd etcdctl member list -w table", + &hypervisorConfig, knownHostsPath, remoteKnownHostsPath) + if etcdErr != nil { + framework.Logf("Failed to get etcd member list from node %s: %v", node.Name, etcdErr) + } else { + framework.Logf("etcd member list from node %s:\n%s", node.Name, etcdOutput) + } + + // Only need one successful node for cluster-wide status + if pcsErr == nil && etcdErr == nil { + break + } + } + + framework.Logf("========== END FINAL CLUSTER STATUS ==========") +} + // deferDiagnosticsOnFailure registers a DeferCleanup handler that gathers diagnostic // information when the current test spec fails. This should be called early in test // setup to ensure diagnostics are collected on any failure. From d451f3c33e722c72c341054b998d5324e7c3fee8 Mon Sep 17 00:00:00 2001 From: Luca Consalvi Date: Fri, 3 Apr 2026 15:30:47 +0200 Subject: [PATCH 2/2] OCPEDGE-2491: Capture stderr in post-recovery SSH diagnostics Include stdout and stderr in error log messages for pcs status and etcd member list commands, so diagnostic output is not lost on failure. Co-Authored-By: Claude Opus 4.6 --- test/extended/two_node/tnf_recovery.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/extended/two_node/tnf_recovery.go b/test/extended/two_node/tnf_recovery.go index c3dbbbba5db3..fde52d48291d 100644 --- a/test/extended/two_node/tnf_recovery.go +++ b/test/extended/two_node/tnf_recovery.go @@ -825,19 +825,19 @@ func logFinalClusterStatus(nodes []corev1.Node) { } // pcs status - pcsOutput, _, pcsErr := services.PcsStatus(nodeIP, &hypervisorConfig, knownHostsPath, remoteKnownHostsPath) + pcsOutput, pcsStderr, pcsErr := services.PcsStatus(nodeIP, &hypervisorConfig, knownHostsPath, remoteKnownHostsPath) if pcsErr != nil { - framework.Logf("Failed to get pcs status from node %s: %v", node.Name, pcsErr) + framework.Logf("Failed to get pcs status from node %s: %v\nstdout: %s\nstderr: %s", node.Name, pcsErr, pcsOutput, pcsStderr) } else { framework.Logf("pcs status from node %s:\n%s", node.Name, pcsOutput) } // etcd member list via SSH (-w table is the etcdctl v3 flag for table output) - etcdOutput, _, etcdErr := core.ExecuteRemoteSSHCommand(nodeIP, + etcdOutput, etcdStderr, etcdErr := core.ExecuteRemoteSSHCommand(nodeIP, "sudo podman exec etcd etcdctl member list -w table", &hypervisorConfig, knownHostsPath, remoteKnownHostsPath) if etcdErr != nil { - framework.Logf("Failed to get etcd member list from node %s: %v", node.Name, etcdErr) + framework.Logf("Failed to get etcd member list from node %s: %v\nstdout: %s\nstderr: %s", node.Name, etcdErr, etcdOutput, etcdStderr) } else { framework.Logf("etcd member list from node %s:\n%s", node.Name, etcdOutput) }