diff --git a/Makefile b/Makefile index 7faabad8..3ab54e3e 100644 --- a/Makefile +++ b/Makefile @@ -12,3 +12,6 @@ storage: backend: docker compose up api email_worker bat_worker asynqmon + +monitoring: + docker compose up api alloy loki grafana \ No newline at end of file diff --git a/apps/api/internal/api/api.go b/apps/api/internal/api/api.go index cfd3d6a7..27bc9213 100644 --- a/apps/api/internal/api/api.go +++ b/apps/api/internal/api/api.go @@ -12,6 +12,7 @@ import ( "github.com/go-chi/cors" "github.com/hibiken/asynq" "github.com/rs/zerolog/log" + "github.com/swamphacks/core/apps/api/internal/api/handlers/grafana" mw "github.com/swamphacks/core/apps/api/internal/api/middleware" "github.com/swamphacks/core/apps/api/internal/config" "github.com/swamphacks/core/apps/api/internal/database" @@ -162,6 +163,10 @@ func Run() { }, nil }) + // Grafana proxy (used for authentication) + grafanaHandler := grafana.NewHandler(logger) + grafana.RegisterRoutes(grafanaHandler, mw, r) + logger.Info().Msgf("API listening on port %s", config.Port) if err := http.ListenAndServe(":"+config.Port, r); err != nil { log.Fatal().Msg("Failed to start server.") diff --git a/apps/api/internal/api/handlers/grafana/http.go b/apps/api/internal/api/handlers/grafana/http.go new file mode 100644 index 00000000..a419c6f4 --- /dev/null +++ b/apps/api/internal/api/handlers/grafana/http.go @@ -0,0 +1,48 @@ +package grafana + +import ( + "net/http" + "net/http/httputil" + "net/url" + + "github.com/go-chi/chi/v5" + "github.com/rs/zerolog" + "github.com/swamphacks/core/apps/api/internal/api/middleware" + auth "github.com/swamphacks/core/apps/api/internal/api/middleware" + "github.com/swamphacks/core/apps/api/internal/database/sqlc" +) + +type handler struct { + logger zerolog.Logger +} + +func RegisterRoutes(grafanaHandler *handler, mw *middleware.Middleware, router *chi.Mux) { + grafanaURL, err := url.Parse("http://grafana:3000") + if err != nil { + grafanaHandler.logger.Error().Err(err).Msg("Failed to parse grafana URL") + return + } + grafanaProxy := httputil.NewSingleHostReverseProxy(grafanaURL) + + handler := mw.Auth.RequireAuth( + mw.Auth.RequireRoles([]sqlc.UserRole{sqlc.UserRoleAdmin})( + http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Set header to mark identity for Grafana + if userCtx, ok := r.Context().Value(auth.UserContextKey).(*auth.UserContext); ok { + r.Header.Set("X-WEBAUTH-USER", userCtx.UserID.String()) + } + + grafanaProxy.ServeHTTP(w, r) + }), + ), + ) + + router.Handle("/grafana", handler) + router.Handle("/grafana/*", handler) +} + +func NewHandler(logger zerolog.Logger) *handler { + return &handler{ + logger: logger.With().Str("handler", "GrafanaHandler").Logger(), + } +} diff --git a/apps/api/internal/logger/logger.go b/apps/api/internal/logger/logger.go index 2260e2d6..6e6a2fbc 100644 --- a/apps/api/internal/logger/logger.go +++ b/apps/api/internal/logger/logger.go @@ -3,7 +3,6 @@ package logger import ( "os" "runtime/debug" - "time" "github.com/rs/zerolog" "github.com/rs/zerolog/log" @@ -12,10 +11,7 @@ import ( func New() zerolog.Logger { buildInfo, _ := debug.ReadBuildInfo() - logger := zerolog.New(zerolog.ConsoleWriter{ - Out: os.Stderr, - TimeFormat: time.RFC3339, - }).Level(zerolog.TraceLevel). + logger := zerolog.New(os.Stderr).Level(zerolog.TraceLevel). With(). Timestamp(). Caller(). diff --git a/docker-compose.yml b/docker-compose.yml index e0a3a6b1..db94ac8a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -87,6 +87,44 @@ services: retries: 5 start_period: 5s + loki: + image: grafana/loki:3.7.1 + container_name: loki + command: "-config.file=/etc/loki/config.yml" + volumes: + - ./monitoring/loki-config.yml:/etc/loki/config.yml + expose: + - "3100" + + alloy: + image: grafana/alloy:v1.15.0 + container_name: alloy + command: run /etc/alloy/config.alloy + volumes: + - ./monitoring/config.alloy:/etc/alloy/config.alloy:ro + - /var/run/docker.sock:/var/run/docker.sock + depends_on: + - loki + + grafana: + image: grafana/grafana:12.4.2 + container_name: grafana + env_file: "./monitoring/.env" + environment: + - GF_PATHS_PROVISIONING=/etc/grafana/provisioning + - GF_AUTH_PROXY_ENABLED=true + - GF_AUTH_PROXY_HEADER_NAME=X-WEBAUTH-USER + - GF_USERS_AUTO_ASSIGN_ORG_ROLE=Admin + - GF_SERVER_ROOT_URL=/grafana + - GF_SERVER_SERVE_FROM_SUB_PATH=true + volumes: + - ./monitoring/provisioning:/etc/grafana/provisioning + - grafana_data:/var/lib/grafana + expose: + - "3000" + depends_on: + - loki + # web-devcontainer: # build: # context: ./.devcontainer/web @@ -106,5 +144,4 @@ services: volumes: postgres_data: - - + grafana_data: diff --git a/monitoring/config.alloy b/monitoring/config.alloy new file mode 100644 index 00000000..6c913c75 --- /dev/null +++ b/monitoring/config.alloy @@ -0,0 +1,55 @@ +discovery.docker "linux" { + host = "unix:///var/run/docker.sock" +} + +discovery.relabel "logs_integrations_docker" { + targets = [] + + rule { + source_labels = ["__meta_docker_container_name"] + regex = "/(.*)" + target_label = "container_name" + } + + rule { + target_label = "instance" + replacement = constants.hostname + } +} + +loki.source.docker "containers" { + host = "unix:///var/run/docker.sock" + targets = discovery.docker.linux.targets + relabel_rules = discovery.relabel.logs_integrations_docker.rules + forward_to = [loki.process.logs.receiver] +} + +loki.process "logs" { + forward_to = [loki.write.default.receiver] + + stage.match { + selector = "{container_name=~\"core-api.*\"}" + + stage.json { + expressions = { + level = "level", + message = "message", + caller = "caller", + go_version = "go_version", + pid = "pid", + } + } + + stage.labels { + values = { + level = "", + } + } + } +} + +loki.write "default" { + endpoint { + url = "http://loki:3100/loki/api/v1/push" + } +} \ No newline at end of file diff --git a/monitoring/loki-config.yml b/monitoring/loki-config.yml new file mode 100644 index 00000000..ae3c8e3b --- /dev/null +++ b/monitoring/loki-config.yml @@ -0,0 +1,28 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + +limits_config: + allow_structured_metadata: true + +common: + path_prefix: /tmp/loki + storage: + filesystem: + chunks_directory: /tmp/loki/chunks + rules_directory: /tmp/loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +schema_config: + configs: + - from: 2026-01-01 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h \ No newline at end of file diff --git a/monitoring/provisioning/alerting/alert-rules.yaml b/monitoring/provisioning/alerting/alert-rules.yaml new file mode 100644 index 00000000..3ebc7515 --- /dev/null +++ b/monitoring/provisioning/alerting/alert-rules.yaml @@ -0,0 +1,88 @@ +apiVersion: 1 + +groups: + - name: core-api-alerts + folder: Services + interval: 1m + + rules: + - uid: core_api_errors + title: Core API errors detected + condition: C + + data: + - refId: A + queryType: range + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: loki_uid + model: + queryType: range + expr: count_over_time({container_name=~"core-api.*"} | json | level="error" [5m]) + refId: A + - refId: reducer + queryType: expression + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 0 + - 0 + type: gt + operator: + type: and + query: + params: [] + reducer: + params: [] + type: avg + type: query + datasource: + name: Expression + type: __expr__ + uid: __expr__ + expression: A + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: reducer + type: reduce + - refId: C + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 0 + type: gt + operator: + type: and + query: + params: [] + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: reducer + intervalMs: 1000 + maxDataPoints: 43200 + refId: C + type: threshold + + for: 1m + + noDataState: NoData + execErrState: Error + + labels: + severity: warning + scope: global + alertname: core_api_errors + + notification_settings: + receiver: discord \ No newline at end of file diff --git a/monitoring/provisioning/alerting/contact-points.yaml b/monitoring/provisioning/alerting/contact-points.yaml new file mode 100644 index 00000000..c31f147a --- /dev/null +++ b/monitoring/provisioning/alerting/contact-points.yaml @@ -0,0 +1,9 @@ +apiVersion: 1 + +contactPoints: + - name: discord + receivers: + - uid: discord_webhook + type: discord + settings: + url: $DISCORD_WEBHOOK \ No newline at end of file diff --git a/monitoring/provisioning/alerting/notification-policies.yaml b/monitoring/provisioning/alerting/notification-policies.yaml new file mode 100644 index 00000000..54e12948 --- /dev/null +++ b/monitoring/provisioning/alerting/notification-policies.yaml @@ -0,0 +1,8 @@ +apiVersion: 1 + +policies: + - orgId: 1 + receiver: discord + group_by: ['alertname'] + routes: + - receiver: discord \ No newline at end of file diff --git a/monitoring/provisioning/datasources/loki.yaml b/monitoring/provisioning/datasources/loki.yaml new file mode 100644 index 00000000..c2eaca6e --- /dev/null +++ b/monitoring/provisioning/datasources/loki.yaml @@ -0,0 +1,10 @@ +apiVersion: 1 + +datasources: + - name: Loki + type: loki + uid: loki_uid + orgId: 1 + access: proxy + url: http://loki:3100 + isDefault: true \ No newline at end of file