From 3ff99d4d0eac29505e4f3f53eadce7d3ebd4958c Mon Sep 17 00:00:00 2001 From: Dan Manor Date: Tue, 21 Apr 2026 11:56:25 +0300 Subject: [PATCH] OCPBUGS-77924: cleanup orphaned boot entries in IPC --- controllers/ipc_idle_handlers.go | 62 ++++++++++++++- controllers/ipc_idle_handlers_test.go | 107 ++++++++++++++++++++++++-- 2 files changed, 160 insertions(+), 9 deletions(-) diff --git a/controllers/ipc_idle_handlers.go b/controllers/ipc_idle_handlers.go index 25dfc5e2b0..593673d395 100644 --- a/controllers/ipc_idle_handlers.go +++ b/controllers/ipc_idle_handlers.go @@ -151,10 +151,18 @@ func (h *IPCIdleStageHandler) cleanup(logger logr.Logger) error { return fmt.Errorf("failed to remount sysroot: %w", err) } + if err := h.ChrootOps.RemountBoot(); err != nil { + return fmt.Errorf("failed to remount boot: %w", err) + } + if err := h.cleanuoUnbootedStateroots(logger); err != nil { return fmt.Errorf("failed to clean up unbooted stateroots: %w", err) } + if err := removeOrphanedBootEntries(logger, h.ChrootOps, h.RPMOstreeClient); err != nil { + return err + } + if err := cleanupLCAWorkspace(h.ChrootOps); err != nil { return fmt.Errorf("failed to cleanup workspace: %w", err) } @@ -199,10 +207,6 @@ func (h *IPCIdleStageHandler) cleanuoUnbootedStateroots(logger logr.Logger) erro } logger.Info("Stateroots to remove", "stateroots", staterootsToRemove) - if err := h.ChrootOps.RemountBoot(); err != nil { - return fmt.Errorf("failed to remount boot: %w", err) - } - if err := removeBootDirsByStaterootPrefixes(logger, h.ChrootOps, staterootsToRemove); err != nil { return err } @@ -250,6 +254,56 @@ func removeBootDirsByStaterootPrefixes( return nil } +// removeOrphanedBootEntries removes directories under /boot/ostree that do not +// match any stateroot currently listed in rpm-ostree deployments. +func removeOrphanedBootEntries( + logger logr.Logger, + chrootOps ops.Ops, + rpmOstreeClient rpmostreeclient.IClient, +) error { + status, err := rpmOstreeClient.QueryStatus() + if err != nil { + return fmt.Errorf("failed to query rpm-ostree status: %w", err) + } + + deployedStateroots := make(map[string]struct{}) + for i := range status.Deployments { + deployedStateroots[status.Deployments[i].OSName] = struct{}{} + } + + bootOstreePath := common.PathOutsideChroot("/boot/ostree") + entries, err := chrootOps.ReadDir(bootOstreePath) + if err != nil { + if chrootOps.IsNotExist(err) { + return nil + } + return fmt.Errorf("failed to list boot ostree directory %s: %w", bootOstreePath, err) + } + + for _, e := range entries { + if !e.IsDir() { + continue + } + name := e.Name() + matched := false + for stateroot := range deployedStateroots { + if strings.HasPrefix(name, stateroot+"-") { + matched = true + break + } + } + if !matched { + dirPath := filepath.Join(bootOstreePath, name) + logger.Info("Removing orphaned boot entry with no matching deployment", "path", dirPath) + if err := chrootOps.RemoveAllFiles(dirPath); err != nil { + return fmt.Errorf("failed to remove orphaned boot directory %s: %w", dirPath, err) + } + } + } + + return nil +} + func getStaterootsToRemove(rpmOstreeClient rpmostreeclient.IClient) ([]string, error) { status, err := rpmOstreeClient.QueryStatus() if err != nil { diff --git a/controllers/ipc_idle_handlers_test.go b/controllers/ipc_idle_handlers_test.go index 657c20ff9a..e93fda5a7b 100644 --- a/controllers/ipc_idle_handlers_test.go +++ b/controllers/ipc_idle_handlers_test.go @@ -399,6 +399,7 @@ func TestIPCIdleStageHandler_Handle(t *testing.T) { CheckHealth = func(ctx context.Context, c client.Reader, l logr.Logger) error { return nil } mockOps.EXPECT().RemountSysroot().Return(nil).Times(1) + mockOps.EXPECT().RemountBoot().Return(nil).Times(1) mockRpm.EXPECT().QueryStatus().Return(nil, errors.New("rpm error")).Times(1) res, err := h.Handle(ctx, ipc) @@ -436,15 +437,20 @@ func TestIPCIdleStageHandler_Handle(t *testing.T) { defer func() { CheckHealth = oldHC }() CheckHealth = func(ctx context.Context, c client.Reader, l logr.Logger) error { return nil } + oldOsReadDir := osReadDir + defer func() { osReadDir = oldOsReadDir }() + osReadDir = func(name string) ([]os.DirEntry, error) { return []os.DirEntry{}, nil } + status := &rpmostreeclient.Status{ Deployments: []rpmostreeclient.Deployment{{OSName: "rhcos", Booted: true}}, } mockOps.EXPECT().RemountSysroot().Return(nil).Times(1) - // getStaterootsToRemove + CleanupUnbootedStateroots - mockRpm.EXPECT().QueryStatus().Return(status, nil).Times(2) + // getStaterootsToRemove + removeOrphanedBootEntries + CleanupUnbootedStateroots + mockRpm.EXPECT().QueryStatus().Return(status, nil).Times(3) mockOps.EXPECT().RemountBoot().Return(nil).Times(1) - mockOps.EXPECT().ReadDir(gomock.Any()).Return([]os.DirEntry{}, nil).Times(1) + // removeBootDirsByStaterootPrefixes + removeOrphanedBootEntries + mockOps.EXPECT().ReadDir(gomock.Any()).Return([]os.DirEntry{}, nil).Times(2) mockRpm.EXPECT().RpmOstreeCleanup().Return(nil).Times(1) mockOps.EXPECT().StatFile(gomock.Any()).Return(fakeFileInfo{}, nil).Times(1) mockOps.EXPECT().RemoveAllFiles(gomock.Any()).Return(errors.New("rm failed")).Times(1) @@ -501,14 +507,20 @@ func TestIPCIdleStageHandler_Handle(t *testing.T) { defer func() { CheckHealth = oldHC }() CheckHealth = func(ctx context.Context, c client.Reader, l logr.Logger) error { return nil } + oldOsReadDir := osReadDir + defer func() { osReadDir = oldOsReadDir }() + osReadDir = func(name string) ([]os.DirEntry, error) { return []os.DirEntry{}, nil } + status := &rpmostreeclient.Status{ Deployments: []rpmostreeclient.Deployment{{OSName: "rhcos", Booted: true}}, } mockOps.EXPECT().RemountSysroot().Return(nil).Times(1) - mockRpm.EXPECT().QueryStatus().Return(status, nil).Times(2) // getStaterootsToRemove + CleanupUnbootedStateroots + // getStaterootsToRemove + removeOrphanedBootEntries + CleanupUnbootedStateroots + mockRpm.EXPECT().QueryStatus().Return(status, nil).Times(3) mockOps.EXPECT().RemountBoot().Return(nil).Times(1) - mockOps.EXPECT().ReadDir(gomock.Any()).Return([]os.DirEntry{}, nil).Times(1) + // removeBootDirsByStaterootPrefixes + removeOrphanedBootEntries + mockOps.EXPECT().ReadDir(gomock.Any()).Return([]os.DirEntry{}, nil).Times(2) mockRpm.EXPECT().RpmOstreeCleanup().Return(nil).Times(1) mockOps.EXPECT().StatFile(gomock.Any()).Return(nil, errors.New("not exist")).Times(1) mockOps.EXPECT().RemoveAllFiles(gomock.Any()).Times(0) @@ -791,3 +803,88 @@ func TestRemoveBootDirsByStaterootPrefixes_NoBootDir(t *testing.T) { err := removeBootDirsByStaterootPrefixes(logger, mockOps, []string{"rhcos"}) assert.NoError(t, err) } + +func TestRemoveOrphanedBootEntries(t *testing.T) { + gc := gomock.NewController(t) + defer gc.Finish() + + mockOps := ops.NewMockOps(gc) + mockRpm := rpmostreeclient.NewMockIClient(gc) + logger := logr.Logger{} + + bootOstreePath := common.PathOutsideChroot("/boot/ostree") + + status := &rpmostreeclient.Status{ + Deployments: []rpmostreeclient.Deployment{ + {OSName: "rhcos_4.22.0_ec.5", Booted: true}, + }, + } + mockRpm.EXPECT().QueryStatus().Return(status, nil).Times(1) + + entries := []os.DirEntry{ + idleCleanupTestDirEntry{name: "rhcos-d1aaeead2ad0571c", isDir: true}, // orphaned, no matching deployment + idleCleanupTestDirEntry{name: "rhcos_192-168-128-80-d1aaeead2ad0571c", isDir: true}, // orphaned, no matching deployment + idleCleanupTestDirEntry{name: "rhcos_4.22.0_ec.5-d1aaeead2ad0571c", isDir: true}, // matches booted stateroot, keep + idleCleanupTestDirEntry{name: "somefile", isDir: false}, // not a dir, ignore + } + mockOps.EXPECT().ReadDir(bootOstreePath).Return(entries, nil).Times(1) + + mockOps.EXPECT().RemoveAllFiles(filepath.Join(bootOstreePath, "rhcos-d1aaeead2ad0571c")).Return(nil).Times(1) + mockOps.EXPECT().RemoveAllFiles(filepath.Join(bootOstreePath, "rhcos_192-168-128-80-d1aaeead2ad0571c")).Return(nil).Times(1) + + err := removeOrphanedBootEntries(logger, mockOps, mockRpm) + assert.NoError(t, err) +} + +func TestRemoveOrphanedBootEntries_NoBootDir(t *testing.T) { + gc := gomock.NewController(t) + defer gc.Finish() + + mockOps := ops.NewMockOps(gc) + mockRpm := rpmostreeclient.NewMockIClient(gc) + logger := logr.Logger{} + + status := &rpmostreeclient.Status{ + Deployments: []rpmostreeclient.Deployment{ + {OSName: "rhcos", Booted: true}, + }, + } + mockRpm.EXPECT().QueryStatus().Return(status, nil).Times(1) + + bootOstreePath := common.PathOutsideChroot("/boot/ostree") + mockOps.EXPECT().ReadDir(bootOstreePath).Return(nil, os.ErrNotExist).Times(1) + mockOps.EXPECT().IsNotExist(os.ErrNotExist).Return(true).Times(1) + + err := removeOrphanedBootEntries(logger, mockOps, mockRpm) + assert.NoError(t, err) +} + +func TestRemoveOrphanedBootEntries_MultipleDeployments(t *testing.T) { + gc := gomock.NewController(t) + defer gc.Finish() + + mockOps := ops.NewMockOps(gc) + mockRpm := rpmostreeclient.NewMockIClient(gc) + logger := logr.Logger{} + + bootOstreePath := common.PathOutsideChroot("/boot/ostree") + + status := &rpmostreeclient.Status{ + Deployments: []rpmostreeclient.Deployment{ + {OSName: "rhcos_4.22.0", Booted: true}, + {OSName: "rhcos_4.21.0", Booted: false}, + }, + } + mockRpm.EXPECT().QueryStatus().Return(status, nil).Times(1) + + entries := []os.DirEntry{ + idleCleanupTestDirEntry{name: "rhcos_4.22.0-aaa", isDir: true}, // matches deployed, keep + idleCleanupTestDirEntry{name: "rhcos_4.21.0-bbb", isDir: true}, // matches deployed, keep + idleCleanupTestDirEntry{name: "rhcos-old-ccc", isDir: true}, // orphaned, remove + } + mockOps.EXPECT().ReadDir(bootOstreePath).Return(entries, nil).Times(1) + mockOps.EXPECT().RemoveAllFiles(filepath.Join(bootOstreePath, "rhcos-old-ccc")).Return(nil).Times(1) + + err := removeOrphanedBootEntries(logger, mockOps, mockRpm) + assert.NoError(t, err) +}