From 954656238e7ba02748640e172615257451b6e2cf Mon Sep 17 00:00:00 2001 From: Tamal Saha Date: Thu, 25 Jun 2026 19:56:01 +0600 Subject: [PATCH] metricshandler: stop busy-looping and honor ctx in collector StartMetricsCollector ran an unbounded for-loop that: - called "continue" on a collection error, skipping the time.Sleep and busy-looping (hammering the API server) whenever collection kept failing; - never observed the passed ctx, so the goroutine kept running after the manager shut down. Replace the loop with wait.UntilWithContext, which waits a full MetricsRefreshPeriod between runs regardless of success/failure and returns when ctx is cancelled. Also harden MetricsStore.WriteAll with a bounds check so a partially populated store (fewer families than headers) cannot panic the /metrics handler with an index-out-of-range. Signed-off-by: Tamal Saha --- pkg/metricshandler/handler.go | 17 ++++++++++------- pkg/metricsstore/store.go | 6 ++++++ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/pkg/metricshandler/handler.go b/pkg/metricshandler/handler.go index 7167885bb3..6d0c4ae031 100644 --- a/pkg/metricshandler/handler.go +++ b/pkg/metricshandler/handler.go @@ -26,6 +26,7 @@ import ( "kubeops.dev/ui-server/pkg/metricsstore" "github.com/prometheus/client_golang/prometheus/promhttp" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/apiserver/pkg/server/mux" "k8s.io/klog/v2" "k8s.io/kube-state-metrics/v2/pkg/metric" @@ -92,25 +93,27 @@ func (h *MetricsHandler) Install(c *mux.PathRecorderMux) { func StartMetricsCollector(mgr manager.Manager) func(ctx context.Context) error { return func(ctx context.Context) error { klog.Infoln("Starts the Metrics Collector") - for { + // Recollect every MetricsRefreshPeriod until the manager shuts down. Using + // wait.UntilWithContext ensures we honor ctx cancellation and always wait a + // full period between runs, even when a collection fails (a bare "continue" + // here previously busy-looped on persistent errors). + wait.UntilWithContext(ctx, func(ctx context.Context) { collector := &Collector{ kc: mgr.GetClient(), opaInstalled: graph.OPAInstalled.Load(), scannerInstalled: graph.ScannerInstalled.Load(), } collector.init() - err := collector.collectMetrics() - if err != nil { + if err := collector.collectMetrics(); err != nil { klog.Errorf("Error occurred while collecting metrics : %s \n", err.Error()) - continue + return } mu.Lock() store = collector.store mu.Unlock() - - time.Sleep(MetricsRefreshPeriod) - } + }, MetricsRefreshPeriod) + return nil } } diff --git a/pkg/metricsstore/store.go b/pkg/metricsstore/store.go index e9b1c8229f..ad7ac8e795 100755 --- a/pkg/metricsstore/store.go +++ b/pkg/metricsstore/store.go @@ -52,6 +52,12 @@ func (s *MetricsStore) Add(family ...*metric.Family) { // help text of each metric family. func (s *MetricsStore) WriteAll(w io.Writer) error { for i, help := range s.headers { + // Defensive: headers and families are expected to be the same length (one + // family per header), but guard against a short families slice so a partially + // populated store cannot panic the /metrics handler. + if i >= len(s.families) { + break + } _, err := w.Write([]byte(help)) if err != nil { return err