Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 156 additions & 2 deletions test/extended/node/node_e2e/node.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
package node

import (
"context"
"fmt"
"regexp"
"strings"
"time"

g "github.com/onsi/ginkgo/v2"
o "github.com/onsi/gomega"
nodeutils "github.com/openshift/origin/test/extended/node"
exutil "github.com/openshift/origin/test/extended/util"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
e2e "k8s.io/kubernetes/test/e2e/framework"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"

nodeutils "github.com/openshift/origin/test/extended/node"
exutil "github.com/openshift/origin/test/extended/util"
)

var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager", func() {
Expand Down Expand Up @@ -104,3 +111,150 @@ var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] Kubelet, CRI-O, CPU manager",
o.Expect(output).To(o.ContainSubstring("spec.cgroupMode: Unsupported value: \"v1\": supported values: \"v2\", \"\""))
})
})

var _ = g.Describe("[sig-node] [Jira:Node/Kubelet] NODE initContainer policy,volume,readines,quota", func() {
defer g.GinkgoRecover()

var (
oc = exutil.NewCLI("node-initcontainer")
)

// Skip all tests on MicroShift clusters as MachineConfig resources are not available
g.BeforeEach(func() {
isMicroShift, err := exutil.IsMicroShiftCluster(oc.AdminKubeClient())
o.Expect(err).NotTo(o.HaveOccurred())
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you change this expect() to just log err if it exists?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just realized that if the check does fail, ginkgo wont stop. Maybe it needs an explicit Fail(). The goal is to make setup failures clearly different from test failures.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A loop around the check might make it more robust. Openshift should eventually respond.

if isMicroShift {
g.Skip("Skipping test on MicroShift cluster - MachineConfig resources are not available")
}
})

//author: bgudi@redhat.com
g.It("[OTP] Init containers should not restart when the exited init container is removed from node [OCP-38271]", func() {
g.By("Test for case OCP-38271")
oc.SetupProject()

podName := "initcon-pod"
namespace := oc.Namespace()
ctx := context.Background()

g.By("Create a pod with init container")
pod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
Namespace: namespace,
},
Spec: corev1.PodSpec{
InitContainers: []corev1.Container{
{
Name: "inittest",
Image: "quay.io/openshifttest/busybox@sha256:c5439d7db88ab5423999530349d327b04279ad3161d7596d2126dfb5b02bfd1f",
Command: []string{"bin/sh", "-ec", "echo running >> /mnt/data/test"},
VolumeMounts: []corev1.VolumeMount{
{
Name: "data",
MountPath: "/mnt/data",
},
},
},
},
Containers: []corev1.Container{
{
Name: "hello-test",
Image: "quay.io/openshifttest/busybox@sha256:c5439d7db88ab5423999530349d327b04279ad3161d7596d2126dfb5b02bfd1f",
Command: []string{"bin/sh", "-c", "sleep 3600"},
VolumeMounts: []corev1.VolumeMount{
{
Name: "data",
MountPath: "/mnt/data",
},
},
},
},
Volumes: []corev1.Volume{
{
Name: "data",
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{},
},
},
},
RestartPolicy: corev1.RestartPolicyNever,
},
}

_, err := oc.KubeClient().CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{})
o.Expect(err).NotTo(o.HaveOccurred())
defer func() {
oc.KubeClient().CoreV1().Pods(namespace).Delete(ctx, podName, metav1.DeleteOptions{})
}()

g.By("Check pod status")
err = e2epod.WaitForPodRunningInNamespace(ctx, oc.KubeClient(), pod)
o.Expect(err).NotTo(o.HaveOccurred(), "pod is not running")

g.By("Check init container exit normally")
err = wait.Poll(5*time.Second, 1*time.Minute, func() (bool, error) {
pod, err := oc.KubeClient().CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{})
if err != nil {
return false, err
}
for _, status := range pod.Status.InitContainerStatuses {
if status.Name == "inittest" {
if status.State.Terminated != nil && status.State.Terminated.ExitCode == 0 {
e2e.Logf("Init container exited with code 0")
return true, nil
}
}
}
return false, nil
})
o.Expect(err).NotTo(o.HaveOccurred(), "container not exit normally")

g.By("Get node where pod is running")
pod, err = oc.KubeClient().CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{})
o.Expect(err).NotTo(o.HaveOccurred())
nodeName := pod.Spec.NodeName
o.Expect(nodeName).NotTo(o.BeEmpty(), "pod node name is empty")

g.By("Get init container ID from pod status")
var containerID string
for _, status := range pod.Status.InitContainerStatuses {
if status.Name == "inittest" {
containerID = status.ContainerID
break
}
}
o.Expect(containerID).NotTo(o.BeEmpty(), "init container ID is empty")

// Extract the actual container ID (remove prefix like "cri-o://")
containerIDPattern := regexp.MustCompile(`^[^/]+://(.+)$`)
matches := containerIDPattern.FindStringSubmatch(containerID)
o.Expect(matches).To(o.HaveLen(2), "failed to parse container ID")
actualContainerID := matches[1]

g.By("Delete init container from node")
deleteCmd := fmt.Sprintf("crictl rm %s", actualContainerID)
output, err := nodeutils.ExecOnNodeWithChroot(oc, nodeName, "/bin/bash", "-c", deleteCmd)
o.Expect(err).NotTo(o.HaveOccurred(), "fail to delete container")
e2e.Logf("Container deletion output: %s", output)

g.By("Check init container not restart again")
err = wait.Poll(5*time.Second, 1*time.Minute, func() (bool, error) {
pod, err := oc.KubeClient().CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{})
if err != nil {
return false, err
}
for _, status := range pod.Status.InitContainerStatuses {
if status.Name == "inittest" {
if status.RestartCount > 0 {
e2e.Logf("Init container restarted, restart count: %d", status.RestartCount)
return false, fmt.Errorf("init container restarted")
}
}
}
e2e.Logf("Init container has not restarted")
return true, nil
})
o.Expect(err).NotTo(o.HaveOccurred(), "init container restart")
Comment on lines +241 to +258
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Test logic doesn't properly verify that the container does not restart.

The current polling logic returns success immediately on the first check where RestartCount == 0. Since this check runs right after the crictl rm, the kubelet may not have had time to detect the container removal. The test could pass even if the container would restart a moment later.

To properly verify the container does NOT restart, observe for a minimum duration while checking that RestartCount remains 0 throughout:

🐛 Suggested fix: observe for full duration
 		g.By("Check init container not restart again")
-		err = wait.Poll(5*time.Second, 1*time.Minute, func() (bool, error) {
+		// Observe for 30 seconds to ensure no restart occurs
+		observationDuration := 30 * time.Second
+		pollInterval := 5 * time.Second
+		startTime := time.Now()
+		err = wait.Poll(pollInterval, observationDuration+pollInterval, func() (bool, error) {
 			pod, err := oc.KubeClient().CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{})
 			if err != nil {
 				return false, err
 			}
 			for _, status := range pod.Status.InitContainerStatuses {
 				if status.Name == "inittest" {
 					if status.RestartCount > 0 {
 						e2e.Logf("Init container restarted, restart count: %d", status.RestartCount)
 						return false, fmt.Errorf("init container restarted")
 					}
 				}
 			}
 			e2e.Logf("Init container has not restarted")
-			return true, nil
+			// Continue observing until the duration has elapsed
+			if time.Since(startTime) >= observationDuration {
+				return true, nil
+			}
+			return false, nil
 		})
 		o.Expect(err).NotTo(o.HaveOccurred(), "init container restart")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
g.By("Check init container not restart again")
err = wait.Poll(5*time.Second, 1*time.Minute, func() (bool, error) {
pod, err := oc.KubeClient().CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{})
if err != nil {
return false, err
}
for _, status := range pod.Status.InitContainerStatuses {
if status.Name == "inittest" {
if status.RestartCount > 0 {
e2e.Logf("Init container restarted, restart count: %d", status.RestartCount)
return false, fmt.Errorf("init container restarted")
}
}
}
e2e.Logf("Init container has not restarted")
return true, nil
})
o.Expect(err).NotTo(o.HaveOccurred(), "init container restart")
g.By("Check init container not restart again")
// Observe for 30 seconds to ensure no restart occurs
observationDuration := 30 * time.Second
pollInterval := 5 * time.Second
startTime := time.Now()
err = wait.Poll(pollInterval, observationDuration+pollInterval, func() (bool, error) {
pod, err := oc.KubeClient().CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{})
if err != nil {
return false, err
}
for _, status := range pod.Status.InitContainerStatuses {
if status.Name == "inittest" {
if status.RestartCount > 0 {
e2e.Logf("Init container restarted, restart count: %d", status.RestartCount)
return false, fmt.Errorf("init container restarted")
}
}
}
e2e.Logf("Init container has not restarted")
// Continue observing until the duration has elapsed
if time.Since(startTime) >= observationDuration {
return true, nil
}
return false, nil
})
o.Expect(err).NotTo(o.HaveOccurred(), "init container restart")
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@test/extended/node/node_e2e/node.go` around lines 241 - 258, The test
currently returns success as soon as RestartCount==0, which allows a race;
change the wait.Poll predicate so it does NOT return success when
RestartCount==0 but instead keeps polling for the full duration: inside the func
passed to wait.Poll iterate pod.Status.InitContainerStatuses for the "inittest"
entry and if RestartCount>0 immediately return true, fmt.Errorf("init container
restarted"); otherwise return false, nil so polling continues; after wait.Poll,
assert that the error is a timeout (wait.ErrWaitTimeout) and fail if err is nil
or a different error (i.e. a restart was detected) — update the expectation that
currently uses o.Expect(err).NotTo(o.HaveOccurred()) accordingly.

})
})