diff --git a/README.md b/README.md index c8268f7..45fd68c 100644 --- a/README.md +++ b/README.md @@ -36,11 +36,12 @@ deployment records to GitHub's artifact metadata API. ## Command Line Options -| Flag | Description | Default | -|---------------|--------------------------------------|--------------------------------------------| -| `-kubeconfig` | Path to kubeconfig file | Uses in-cluster config or `~/.kube/config` | -| `-namespace` | Namespace to monitor (empty for all) | `""` (all namespaces) | -| `-workers` | Number of worker goroutines | `2` | +| Flag | Description | Default | +|-----------------|--------------------------------------|--------------------------------------------| +| `-kubeconfig` | Path to kubeconfig file | Uses in-cluster config or `~/.kube/config` | +| `-namespace` | Namespace to monitor (empty for all) | `""` (all namespaces) | +| `-workers` | Number of worker goroutines | `2` | +| `-metrics-port` | Port number for Prometheus metrics | 9090 | ## Environment Variables @@ -67,14 +68,6 @@ The `DN_TEMPLATE` supports the following placeholders: - `{{deploymentName}}` - Name of the owning Deployment - `{{containerName}}` - Container name -## Output Format - -``` -[2024-01-15T10:30:00Z] OK CREATED name=nginx deployment_name=default/nginx/nginx digest=sha256:abc123... status=deployed -[2024-01-15T10:30:10Z] OK DELETED name=nginx deployment_name=default/nginx/nginx digest=sha256:abc123... status=decommissioned -[2024-01-15T10:30:15Z] FAILED CREATED name=myapp deployment_name=default/myapp/app error=connection refused -``` - ## Kubernetes Deployment A complete deployment manifest is provided in `deploy/manifest.yaml` @@ -86,23 +79,6 @@ which includes: - **ClusterRoleBinding**: Binds the ServiceAccount to the ClusterRole - **Deployment**: Runs the controller with security hardening -### Deploy to Kubernetes - -``` -# Update the image in the manifest, then apply -kubectl apply -f deploy/manifest.yaml -``` - -### View Logs - -```bash -# Follow logs from the controller -kubectl logs -f -n deployment-tracker deployment/deployment-tracker - -# View recent logs -kubectl logs -n deployment-tracker deployment/deployment-tracker --tail=100 -``` - ### Verify Deployment ```bash @@ -151,3 +127,30 @@ If you only need to monitor a single namespace, you can modify the manifest to u │ │ │ └───────────┘ │ │ │ └─────────────────┘ └─────────────────┘ └─────────────────┘ ``` + +## Metrics + +The deployment tracker provides Prometheus metrics, exposed via `http` +at `:9090/metrics`. The port can be configured with the +`-metrics-port` flag (`9090` is the default). + +The metrics exposed beyond the default Prometheus metrics are: + +* `deptracker_events_processed_ok`: the total number of successful + events processed from the k8s API server. The metric is tagged the + event type (`CREATED`/`DELETED`). +* `deptracker_events_processed_failed`: the total number of failed + events processed from the k8s API server. The metric is tagged the + event type (`CREATED`/`DELETED`). +* `deptracker_events_processed_timer`: the processing time for each + event. The metric is tagged with the status of the event processing + (`ok`/`failed`). +* `deptracker_post_deployment_record_timer`: the duration of the + outgoing HTTP POST to upload the deployment record. +* `deptracker_post_record_ok`: the number of successful deployment + record uploads. +* `deptracker_post_record_soft_fail`: the number of recoverable failed + attempts to upload the deployment record. +* `deptracker_post_record_hard_fail`: the number of failures to + persist a record via the HTTP API (either an irrecoverable error or + all retries are exhausted). diff --git a/cmd/deployment-tracker/main.go b/cmd/deployment-tracker/main.go index 00d389a..05c4747 100644 --- a/cmd/deployment-tracker/main.go +++ b/cmd/deployment-tracker/main.go @@ -3,13 +3,17 @@ package main import ( "context" "flag" + "log" "log/slog" + "net/http" "os" "os/signal" "syscall" + "time" "github.com/github/deployment-tracker/internal/controller" + "github.com/prometheus/client_golang/prometheus/promhttp" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" @@ -28,17 +32,20 @@ func getEnvOrDefault(key, defaultValue string) string { func main() { var ( - kubeconfig string - namespace string - workers int + kubeconfig string + namespace string + workers int + metricsPort string ) flag.StringVar(&kubeconfig, "kubeconfig", "", "path to kubeconfig file (uses in-cluster config if not set)") flag.StringVar(&namespace, "namespace", "", "namespace to monitor (empty for all namespaces)") flag.IntVar(&workers, "workers", 2, "number of worker goroutines") + flag.StringVar(&metricsPort, "metrics-port", "9090", "port to listen to for metrics") flag.Parse() // init logging + log.SetFlags(log.LstdFlags | log.Lshortfile | log.LUTC) opts := slog.HandlerOptions{Level: slog.LevelInfo} slog.SetDefault(slog.New(slog.NewJSONHandler(os.Stdout, &opts))) @@ -79,6 +86,26 @@ func main() { os.Exit(1) } + // Start the metrics server + go func() { + var mm = http.NewServeMux() + mm.Handle("/metrics", promhttp.Handler()) + + var promSrv = &http.Server{ + Addr: ":" + metricsPort, + ReadTimeout: 10 * time.Second, + WriteTimeout: 10 * time.Second, + ReadHeaderTimeout: 10 * time.Second, + Handler: mm, + } + slog.Info("starting Prometheus metrics server", + "url", promSrv.Addr) + if err := promSrv.ListenAndServe(); err != nil { + slog.Error("failed to start metrics server", + "error", err) + } + }() + ctx, cancel := context.WithCancel(context.Background()) sigCh := make(chan os.Signal, 1) diff --git a/go.mod b/go.mod index 4d948c4..888b81d 100644 --- a/go.mod +++ b/go.mod @@ -3,12 +3,15 @@ module github.com/github/deployment-tracker go 1.25.4 require ( + github.com/prometheus/client_golang v1.23.2 k8s.io/api v0.34.3 k8s.io/apimachinery v0.34.3 k8s.io/client-go v0.34.3 ) require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/emicklei/go-restful/v3 v3.12.2 // indirect github.com/fxamacker/cbor/v2 v2.9.0 // indirect @@ -28,17 +31,20 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/prometheus/client_model v0.6.2 // indirect + github.com/prometheus/common v0.66.1 // indirect + github.com/prometheus/procfs v0.16.1 // indirect github.com/spf13/pflag v1.0.6 // indirect github.com/x448/float16 v0.8.4 // indirect go.yaml.in/yaml/v2 v2.4.2 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect - golang.org/x/net v0.38.0 // indirect - golang.org/x/oauth2 v0.27.0 // indirect - golang.org/x/sys v0.31.0 // indirect - golang.org/x/term v0.30.0 // indirect - golang.org/x/text v0.23.0 // indirect + golang.org/x/net v0.43.0 // indirect + golang.org/x/oauth2 v0.30.0 // indirect + golang.org/x/sys v0.35.0 // indirect + golang.org/x/term v0.34.0 // indirect + golang.org/x/text v0.28.0 // indirect golang.org/x/time v0.9.0 // indirect - google.golang.org/protobuf v1.36.5 // indirect + google.golang.org/protobuf v1.36.8 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index 1a40412..39fb81f 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,7 @@ +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= @@ -35,6 +39,8 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= @@ -42,6 +48,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -60,6 +68,14 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs= +github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= +github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= +github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= @@ -73,8 +89,8 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= -github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= @@ -94,38 +110,38 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= -golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= -golang.org/x/oauth2 v0.27.0 h1:da9Vo7/tDv5RH/7nZDz1eMGS/q1Vv1N/7FCrBhI9I3M= -golang.org/x/oauth2 v0.27.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= +golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= +golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= +golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= +golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= -golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y= -golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g= +golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= +golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4= +golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= -golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= +golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= +golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY= golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= -golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= +golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0= +golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM= -google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= +google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/internal/controller/controller.go b/internal/controller/controller.go index 24d65ce..9a0495c 100644 --- a/internal/controller/controller.go +++ b/internal/controller/controller.go @@ -9,6 +9,7 @@ import ( "github.com/github/deployment-tracker/pkg/deploymentrecord" "github.com/github/deployment-tracker/pkg/image" + "github.com/github/deployment-tracker/pkg/metrics" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/runtime" @@ -123,7 +124,11 @@ func New(clientset kubernetes.Interface, namespace string, cfg *Config) *Control return } - // Only process if pod just became running + // Only process if pod just became running. + // We need to process this as often when a container + // is created, the spec does not contain the digest + // so we need to wait for the status field to be + // populated from where we can get the digest. if oldPod.Status.Phase != corev1.PodRunning && newPod.Status.Phase == corev1.PodRunning { key, err := cache.MetaNamespaceKeyFunc(newObj) @@ -221,11 +226,19 @@ func (c *Controller) processNextItem(ctx context.Context) bool { } defer c.workqueue.Done(event) + start := time.Now() err := c.processEvent(ctx, event) + dur := time.Since(start) + if err == nil { + metrics.EventsProcessedOk.WithLabelValues(event.EventType).Inc() + metrics.EventsProcessedTimer.WithLabelValues("ok").Observe(dur.Seconds()) + c.workqueue.Forget(event) return true } + metrics.EventsProcessedTimer.WithLabelValues("failed").Observe(dur.Seconds()) + metrics.EventsProcessedFailed.WithLabelValues(event.EventType).Inc() // Requeue on error with rate limiting slog.Error("Failed to process event, requeuing", diff --git a/pkg/deploymentrecord/client.go b/pkg/deploymentrecord/client.go index be3c215..d71cf13 100644 --- a/pkg/deploymentrecord/client.go +++ b/pkg/deploymentrecord/client.go @@ -6,8 +6,11 @@ import ( "encoding/json" "errors" "fmt" + "log/slog" "net/http" "time" + + "github.com/github/deployment-tracker/pkg/metrics" ) // ClientOption is a function that configures the Client. @@ -79,10 +82,12 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { bodyReader := bytes.NewReader(body) var lastErr error - for attempt := 0; attempt <= c.retries; attempt++ { + // The first attempt is not a retry! + for attempt := range c.retries + 1 { if attempt > 0 { // Wait before retry with exponential backoff - time.Sleep(time.Duration(attempt*100) * time.Millisecond) + time.Sleep(time.Duration(attempt*100) * + time.Millisecond) } // Reset reader position for retries @@ -98,15 +103,24 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { req.Header.Set("Authorization", "Bearer "+c.apiToken) } + start := time.Now() resp, err := c.httpClient.Do(req) + dur := time.Since(start) + metrics.PostDeploymentRecordTimer.Observe(dur.Seconds()) if err != nil { - lastErr = fmt.Errorf("request failed: %w", err) + lastErr = fmt.Errorf("post request failed: %w", err) + + slog.Warn("recoverable error, re-trying", + "attempt", attempt, + "retries", c.retries, + "error", lastErr) + metrics.PostDeploymentRecordSoftFail.Inc() continue } - resp.Body.Close() if resp.StatusCode >= 200 && resp.StatusCode < 300 { + metrics.PostDeploymentRecordOk.Inc() return nil } @@ -115,9 +129,18 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { // Don't retry on client errors (4xx) except for 429 // (rate limit) if resp.StatusCode >= 400 && resp.StatusCode < 500 && resp.StatusCode != 429 { + metrics.PostDeploymentRecordHardFail.Inc() + slog.Error("irrecoverable error, aborting", + "attempt", attempt, + "error", lastErr) return lastErr } + metrics.PostDeploymentRecordSoftFail.Inc() } + metrics.PostDeploymentRecordHardFail.Inc() + slog.Error("all retries exhausted", + "count", c.retries, + "error", lastErr) return fmt.Errorf("all retries exhausted: %w", lastErr) } diff --git a/pkg/metrics/prom.go b/pkg/metrics/prom.go new file mode 100644 index 0000000..cdcb51d --- /dev/null +++ b/pkg/metrics/prom.go @@ -0,0 +1,67 @@ +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var ( + //nolint: revive + EventsProcessedOk = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "deptracker_events_processed_ok", + Help: "The total number of successful events", + }, + []string{"event_type"}, + ) + + //nolint: revive + EventsProcessedFailed = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "deptracker_events_processed_failed", + Help: "The total number of failed events", + }, + []string{"event_type"}, + ) + + //nolint: revive + EventsProcessedTimer = promauto.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "deptracker_events_processed_timer", + Help: "The duration (seconds) for processing k8s events", + }, + []string{"status"}, + ) + + //nolint: revive + PostDeploymentRecordTimer = promauto.NewHistogram( + prometheus.HistogramOpts{ + Name: "deptracker_post_deployment_record_timer", + Help: "The duration (seconds) for posting data to the GitHub API", + }, + ) + + //nolint: revive + PostDeploymentRecordOk = promauto.NewCounter( + prometheus.CounterOpts{ + Name: "deptracker_post_record_ok", + Help: "The total number of successful posts", + }, + ) + + //nolint: revive + PostDeploymentRecordSoftFail = promauto.NewCounter( + prometheus.CounterOpts{ + Name: "deptracker_post_record_soft_fail", + Help: "The total number of soft (recoverable) post failures", + }, + ) + + //nolint: revive + PostDeploymentRecordHardFail = promauto.NewCounter( + prometheus.CounterOpts{ + Name: "deptracker_post_record_hard_fail", + Help: "The total number of hard post failures", + }, + ) +)