diff --git a/go.mod b/go.mod index b5dea87..889361b 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,9 @@ module github.com/github/deployment-tracker go 1.25.4 require ( + github.com/bradleyfalzon/ghinstallation/v2 v2.17.0 github.com/prometheus/client_golang v1.23.2 + golang.org/x/time v0.14.0 k8s.io/api v0.35.0 k8s.io/apimachinery v0.35.0 k8s.io/client-go v0.35.0 @@ -11,7 +13,6 @@ require ( require ( github.com/beorn7/perks v1.0.1 // indirect - github.com/bradleyfalzon/ghinstallation/v2 v2.17.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/emicklei/go-restful/v3 v3.12.2 // indirect @@ -45,7 +46,6 @@ require ( golang.org/x/sys v0.38.0 // indirect golang.org/x/term v0.37.0 // indirect golang.org/x/text v0.31.0 // indirect - golang.org/x/time v0.14.0 // indirect google.golang.org/protobuf v1.36.8 // indirect gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect diff --git a/go.sum b/go.sum index a94e27b..52a5a1f 100644 --- a/go.sum +++ b/go.sum @@ -118,8 +118,6 @@ golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU= golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254= golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM= golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM= -golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY= -golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ= diff --git a/internal/controller/controller.go b/internal/controller/controller.go index 9ac951b..1104cea 100644 --- a/internal/controller/controller.go +++ b/internal/controller/controller.go @@ -7,7 +7,6 @@ import ( "log/slog" "slices" "strings" - "sync" "time" "github.com/github/deployment-tracker/pkg/deploymentrecord" @@ -15,6 +14,7 @@ import ( "github.com/github/deployment-tracker/pkg/metrics" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" + amcache "k8s.io/apimachinery/pkg/util/cache" "k8s.io/client-go/metadata" corev1 "k8s.io/api/core/v1" @@ -37,6 +37,12 @@ const ( RuntimeRiskAnnotationKey = "github.com/runtime-risks" ) +type ttlCache interface { + Get(k any) (any, bool) + Set(k any, v any, ttl time.Duration) + Delete(k any) +} + // PodEvent represents a pod event to be processed. type PodEvent struct { Key string @@ -60,7 +66,7 @@ type Controller struct { // best effort cache to avoid redundant posts // post requests are idempotent, so if this cache fails due to // restarts or other events, nothing will break. - observedDeployments sync.Map + observedDeployments ttlCache } // New creates a new deployment tracker controller. @@ -96,12 +102,13 @@ func New(clientset kubernetes.Interface, metadataClient metadata.Interface, name } cntrl := &Controller{ - clientset: clientset, - metadataClient: metadataClient, - podInformer: podInformer, - workqueue: queue, - apiClient: apiClient, - cfg: cfg, + clientset: clientset, + metadataClient: metadataClient, + podInformer: podInformer, + workqueue: queue, + apiClient: apiClient, + cfg: cfg, + observedDeployments: amcache.NewExpiring(), } // Add event handlers to the informer @@ -395,6 +402,8 @@ func (c *Controller) deploymentExists(ctx context.Context, namespace, name strin // recordContainer records a single container's deployment info. func (c *Controller) recordContainer(ctx context.Context, pod *corev1.Pod, container corev1.Container, status, eventType string, runtimeRisks []deploymentrecord.RuntimeRisk) error { + var cacheKey string + dn := getARDeploymentName(pod, container, c.cfg.Template) digest := getContainerDigest(pod, container.Name) @@ -409,12 +418,11 @@ func (c *Controller) recordContainer(ctx context.Context, pod *corev1.Pod, conta return nil } - cacheKey := getCacheKey(dn, digest) - // Check if we've already recorded this deployment switch status { case deploymentrecord.StatusDeployed: - if _, exists := c.observedDeployments.Load(cacheKey); exists { + cacheKey = getCacheKey(EventCreated, dn, digest) + if _, exists := c.observedDeployments.Get(cacheKey); exists { slog.Debug("Deployment already observed, skipping post", "deployment_name", dn, "digest", digest, @@ -422,9 +430,9 @@ func (c *Controller) recordContainer(ctx context.Context, pod *corev1.Pod, conta return nil } case deploymentrecord.StatusDecommissioned: - // For delete, check if we've seen it - if not, no need to decommission - if _, exists := c.observedDeployments.Load(cacheKey); !exists { - slog.Debug("Deployment not in cache, skipping decommission", + cacheKey = getCacheKey(EventDeleted, dn, digest) + if _, exists := c.observedDeployments.Get(cacheKey); exists { + slog.Debug("Deployment already deleted, skipping post", "deployment_name", dn, "digest", digest, ) @@ -488,8 +496,16 @@ func (c *Controller) recordContainer(ctx context.Context, pod *corev1.Pod, conta // Update cache after successful post switch status { case deploymentrecord.StatusDeployed: - c.observedDeployments.Store(cacheKey, true) + cacheKey = getCacheKey(EventCreated, dn, digest) + c.observedDeployments.Set(cacheKey, true, 2*time.Minute) + // If there was a previous delete event, remove that + cacheKey = getCacheKey(EventDeleted, dn, digest) + c.observedDeployments.Delete(cacheKey) case deploymentrecord.StatusDecommissioned: + cacheKey = getCacheKey(EventDeleted, dn, digest) + c.observedDeployments.Set(cacheKey, true, 2*time.Minute) + // If there was a previous created event, remove that + cacheKey = getCacheKey(EventCreated, dn, digest) c.observedDeployments.Delete(cacheKey) default: return fmt.Errorf("invalid status: %s", status) @@ -586,8 +602,8 @@ func (c *Controller) getOwnerMetadata(ctx context.Context, namespace string, own return obj, nil } -func getCacheKey(dn, digest string) string { - return dn + "||" + digest +func getCacheKey(ev, dn, digest string) string { + return ev + "||" + dn + "||" + digest } // createInformerFactory creates a shared informer factory with the given resync period.