diff --git a/pkg/metricshandler/handler.go b/pkg/metricshandler/handler.go index 7167885bb3..6d0c4ae031 100644 --- a/pkg/metricshandler/handler.go +++ b/pkg/metricshandler/handler.go @@ -26,6 +26,7 @@ import ( "kubeops.dev/ui-server/pkg/metricsstore" "github.com/prometheus/client_golang/prometheus/promhttp" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/apiserver/pkg/server/mux" "k8s.io/klog/v2" "k8s.io/kube-state-metrics/v2/pkg/metric" @@ -92,25 +93,27 @@ func (h *MetricsHandler) Install(c *mux.PathRecorderMux) { func StartMetricsCollector(mgr manager.Manager) func(ctx context.Context) error { return func(ctx context.Context) error { klog.Infoln("Starts the Metrics Collector") - for { + // Recollect every MetricsRefreshPeriod until the manager shuts down. Using + // wait.UntilWithContext ensures we honor ctx cancellation and always wait a + // full period between runs, even when a collection fails (a bare "continue" + // here previously busy-looped on persistent errors). + wait.UntilWithContext(ctx, func(ctx context.Context) { collector := &Collector{ kc: mgr.GetClient(), opaInstalled: graph.OPAInstalled.Load(), scannerInstalled: graph.ScannerInstalled.Load(), } collector.init() - err := collector.collectMetrics() - if err != nil { + if err := collector.collectMetrics(); err != nil { klog.Errorf("Error occurred while collecting metrics : %s \n", err.Error()) - continue + return } mu.Lock() store = collector.store mu.Unlock() - - time.Sleep(MetricsRefreshPeriod) - } + }, MetricsRefreshPeriod) + return nil } } diff --git a/pkg/metricsstore/store.go b/pkg/metricsstore/store.go index e9b1c8229f..ad7ac8e795 100755 --- a/pkg/metricsstore/store.go +++ b/pkg/metricsstore/store.go @@ -52,6 +52,12 @@ func (s *MetricsStore) Add(family ...*metric.Family) { // help text of each metric family. func (s *MetricsStore) WriteAll(w io.Writer) error { for i, help := range s.headers { + // Defensive: headers and families are expected to be the same length (one + // family per header), but guard against a short families slice so a partially + // populated store cannot panic the /metrics handler. + if i >= len(s.families) { + break + } _, err := w.Write([]byte(help)) if err != nil { return err