From 37f9b4b254da62c6f28f076571e6b71622f1315e Mon Sep 17 00:00:00 2001 From: binarybaron Date: Fri, 5 Jun 2026 12:52:09 +0200 Subject: [PATCH 1/4] feat(orchestrator): ship per-container metrics via prometheus Adds optional cadvisor + prometheus-agent services to the generated docker-compose.yml, gated by METRICS_REMOTE_WRITE_URL. cadvisor exposes per-container cpu/memory/pids/network/fs metrics; the prometheus agent scrapes it locally and remote_writes to a central endpoint, mirroring the push-only model the asb hosts already use for logs. The bearer token and host label are reused from the Promtail config so metrics and logs authenticate identically and share one Grafana selector. --- swap-orchestrator/README.md | 2 + swap-orchestrator/src/compose.rs | 105 +++++++++++++++++++++++++++++++ swap-orchestrator/src/images.rs | 6 ++ swap-orchestrator/src/main.rs | 66 ++++++++++++++++++- swap-orchestrator/tests/spec.rs | 77 ++++++++++++++++++++--- 5 files changed, 243 insertions(+), 13 deletions(-) diff --git a/swap-orchestrator/README.md b/swap-orchestrator/README.md index eebd9841da..7d48d3862f 100644 --- a/swap-orchestrator/README.md +++ b/swap-orchestrator/README.md @@ -42,6 +42,8 @@ Run the command below to start the wizard. It’ll guide you through a bunch of To also ship the `asb` tracing logs and the `bitcoind`/`monerod`/`electrs` container logs to a Loki endpoint, set `PROMTAIL_LOKI_PUSH_URL`, `PROMTAIL_LOKI_PUSH_TOKEN`, and `PROMTAIL_INSTANCE` before running the orchestrator — this adds `promtail` and `docker-socket-proxy` services to the generated `docker-compose.yml`. All streams are labelled with `host=`; the daemon logs additionally carry `job=node` and `container=`. +To additionally ship per-container resource metrics (cpu, memory, pids, network, fs) to a Prometheus `remote_write` endpoint, set `METRICS_REMOTE_WRITE_URL` — this adds `cadvisor` and `prometheus-agent` services to the generated `docker-compose.yml`. Metrics reuse the Promtail bearer token and `host=` label (so metrics and logs share one Grafana selector), so the `PROMTAIL_*` variables must be set as well. The central collector (`scripts/logging`) gates the metrics endpoint with the same token as the Loki push, behind the same Cloudflare tunnel. + If the `asb`/`asb-controller`/`rendezvous-node` images are built from a **private** GitHub repository, set `GH_TOKEN` to a token with read access before running the orchestrator. The token is inlined into the build-context URL in the generated `docker-compose.yml` so `docker compose build` can clone the repository. ```bash diff --git a/swap-orchestrator/src/compose.rs b/swap-orchestrator/src/compose.rs index ab917061ea..de786e68c0 100644 --- a/swap-orchestrator/src/compose.rs +++ b/swap-orchestrator/src/compose.rs @@ -11,6 +11,7 @@ pub const ASB_DATA_DIR: &str = "/asb-data"; pub const ASB_CONFIG_FILE: &str = "config.toml"; pub const DOCKER_COMPOSE_FILE: &str = "./docker-compose.yml"; pub const PROMTAIL_CONFIG_FILE: &str = "./promtail.yml"; +pub const PROMETHEUS_CONFIG_FILE: &str = "./prometheus.yml"; pub struct OrchestratorInput { pub ports: OrchestratorPorts, @@ -20,6 +21,7 @@ pub struct OrchestratorInput { pub want_tor: bool, pub cloudflared: Option, pub promtail: Option, + pub metrics: Option, } /// Cloudflare Tunnel configuration. @@ -65,6 +67,29 @@ pub struct PromtailConfig { pub instance: String, } +/// Prometheus metrics-shipping configuration. +/// +/// When set, the orchestrator adds `cadvisor` and `prometheus-agent` services +/// to the compose file and writes a `prometheus.yml` next to +/// `docker-compose.yml`. cadvisor exposes per-container resource metrics; the +/// Prometheus agent scrapes it locally and `remote_write`s to a central +/// endpoint. The bearer token and host label are reused from [`PromtailConfig`] +/// so metrics and logs authenticate identically and share the same `host` +/// selector in Grafana. +#[derive(Clone)] +pub struct MetricsConfig { + /// Prometheus `remote_write` endpoint, e.g. + /// `https://asb-logs.example.com/api/v1/write`. + pub remote_write_url: String, + /// Bearer token presented to the endpoint. The same token Promtail uses for + /// the Loki push — the central gate authorizes both with one token. + pub token: String, + /// Short host identifier, exported as the `host` external label so a + /// deployment's metrics and logs select with the same query. Reused from + /// the Promtail instance. + pub instance: String, +} + pub struct OrchestratorDirectories { pub asb_data_dir: PathBuf, } @@ -87,6 +112,8 @@ pub struct OrchestratorImages { pub cloudflared: T, pub promtail: T, pub docker_socket_proxy: T, + pub cadvisor: T, + pub prometheus_agent: T, } pub struct OrchestratorPorts { @@ -366,6 +393,49 @@ fn build(input: OrchestratorInput) -> String { (String::new(), "") }; + let (metrics_segment, metrics_volume) = if input.metrics.is_some() { + // cadvisor reads cgroups/host paths read-only to expose per-container + // CPU/memory/PID/network/fs metrics. prometheus-agent runs in agent + // mode (no local query/storage beyond the WAL) and only scrapes + // cadvisor, then remote_writes to the central endpoint. The endpoint + // URL, bearer token and host label are baked into prometheus.yml. + let metrics_segment = format!( + "\ + cadvisor: + container_name: cadvisor + {image_cadvisor} + restart: unless-stopped + privileged: true + devices: + - /dev/kmsg:/dev/kmsg + volumes: + - '/:/rootfs:ro' + - '/var/run:/var/run:ro' + - '/sys:/sys:ro' + - '/var/lib/docker/:/var/lib/docker:ro' + - '/dev/disk/:/dev/disk:ro' + expose: + - 8080 + prometheus-agent: + container_name: prometheus-agent + {image_prometheus_agent} + restart: unless-stopped + depends_on: + - cadvisor + volumes: + - '{prometheus_config_file}:/etc/prometheus/prometheus.yml:ro' + - 'prometheus-agent-data:/prometheus' + command: [\"--config.file=/etc/prometheus/prometheus.yml\", \"--agent\", \"--storage.agent.path=/prometheus\"]\ +", + image_cadvisor = input.images.cadvisor.to_image_attribute(), + image_prometheus_agent = input.images.prometheus_agent.to_image_attribute(), + prometheus_config_file = PROMETHEUS_CONFIG_FILE, + ); + (metrics_segment, "prometheus-agent-data:") + } else { + (String::new(), "") + }; + let (tor_segment, tor_volume) = if input.want_tor { // This image comes with an empty /etc/tor/, so this is the entire config let command_tor = command![ @@ -457,6 +527,7 @@ services: {tor_segment} {cloudflared_segment} {promtail_segment} + {metrics_segment} asb: container_name: asb {image_asb} @@ -510,6 +581,7 @@ volumes: rendezvous-data: {tor_volume} {promtail_volume} + {metrics_volume} ", port_monerod_rpc = input.ports.monerod_rpc, port_bitcoind_rpc = input.ports.bitcoind_rpc, @@ -627,6 +699,39 @@ scrape_configs: ) } +/// Builds the YAML body of `prometheus.yml` for the host's Prometheus agent. +/// +/// The agent scrapes the local cadvisor and `remote_write`s to the central +/// endpoint. Values from [`MetricsConfig`] are baked in directly. The `host` +/// external label matches the Promtail `host` label so a deployment's metrics +/// and logs select identically in Grafana. +pub fn build_prometheus_agent_yml(cfg: &MetricsConfig) -> String { + fn yaml_single_quote(value: &str) -> String { + format!("'{}'", value.replace('\'', "''")) + } + + format!( + "\ +global: + scrape_interval: 30s + external_labels: + host: {instance} + +scrape_configs: + - job_name: cadvisor + static_configs: + - targets: ['cadvisor:8080'] + +remote_write: + - url: {url} + bearer_token: {token} +", + instance = yaml_single_quote(&cfg.instance), + url = yaml_single_quote(&cfg.remote_write_url), + token = yaml_single_quote(&cfg.token), + ) +} + pub struct Flags(Vec); /// Displays a list of flags into the "Exec form" supported by Docker diff --git a/swap-orchestrator/src/images.rs b/swap-orchestrator/src/images.rs index 3553f787ae..d1ca56c7ab 100644 --- a/swap-orchestrator/src/images.rs +++ b/swap-orchestrator/src/images.rs @@ -41,6 +41,12 @@ pub static PROMTAIL_IMAGE: &str = "grafana/promtail@sha256:8b2aa61745bc4a9343cc4 /// docker-socket-proxy 0.3.0 (https://hub.docker.com/r/tecnativa/docker-socket-proxy) pub static DOCKER_SOCKET_PROXY_IMAGE: &str = "tecnativa/docker-socket-proxy@sha256:9e4b9e7517a6b660f2cc903a19b257b1852d5b3344794e3ea334ff00ae677ac2"; +/// cadvisor v0.49.1 (https://github.com/google/cadvisor/pkgs/container/cadvisor) +pub static CADVISOR_IMAGE: &str = "gcr.io/cadvisor/cadvisor@sha256:3cde6faf0791ebf7b41d6f8ae7145466fed712ea6f252c935294d2608b1af388"; + +/// prometheus v3.1.0 (https://hub.docker.com/r/prom/prometheus) +pub static PROMETHEUS_IMAGE: &str = "prom/prometheus@sha256:6559acbd5d770b15bb3c954629ce190ac3cbbdb2b7f1c30f0385c4e05104e218"; + /// Build-context URL for the source-built images. A `gh_token` is inlined into /// the URL userinfo so Docker can fetch a private repository — note this writes /// the token into `docker-compose.yml` in plaintext. diff --git a/swap-orchestrator/src/main.rs b/swap-orchestrator/src/main.rs index c8c9ae31e6..7958d0809f 100644 --- a/swap-orchestrator/src/main.rs +++ b/swap-orchestrator/src/main.rs @@ -6,9 +6,10 @@ mod prompt; use swap_orchestrator as _; use crate::compose::{ - ASB_DATA_DIR, CloudflaredConfig, DOCKER_COMPOSE_FILE, IntoSpec, OrchestratorDirectories, - OrchestratorImage, OrchestratorImages, OrchestratorInput, OrchestratorNetworks, - PROMTAIL_CONFIG_FILE, PromtailConfig, build_promtail_yml, + ASB_DATA_DIR, CloudflaredConfig, DOCKER_COMPOSE_FILE, IntoSpec, MetricsConfig, + OrchestratorDirectories, OrchestratorImage, OrchestratorImages, OrchestratorInput, + OrchestratorNetworks, PROMETHEUS_CONFIG_FILE, PROMTAIL_CONFIG_FILE, PromtailConfig, + build_prometheus_agent_yml, build_promtail_yml, }; use libp2p::Multiaddr; use libp2p::multiaddr::Protocol; @@ -141,6 +142,33 @@ fn read_promtail_config_from_env() -> Option { }) } +/// Reads the Prometheus metrics-shipping configuration from the environment. +/// +/// Opt-in via `METRICS_REMOTE_WRITE_URL`. Metrics reuse Promtail's bearer token +/// and instance label (the central gate authorizes both with one token and the +/// `host` label must match), so Promtail must be configured too — enabling +/// metrics without it is a hard error rather than a silent, unauthenticated or +/// unlabelled push. +fn read_metrics_config_from_env(promtail: Option<&PromtailConfig>) -> Option { + let remote_write_url = std::env::var("METRICS_REMOTE_WRITE_URL").ok()?; + + if remote_write_url.trim().is_empty() { + panic!("METRICS_REMOTE_WRITE_URL must not be empty."); + } + + let promtail = promtail.unwrap_or_else(|| { + panic!( + "METRICS_REMOTE_WRITE_URL is set but Promtail is not configured. Metrics reuse the Promtail bearer token and instance label, so set the PROMTAIL_* variables as well." + ) + }); + + Some(MetricsConfig { + remote_write_url, + token: promtail.loki_push_token.clone(), + instance: promtail.instance.clone(), + }) +} + /// `GH_TOKEN` for fetching a private build-context repository; `None` if unset /// or empty. See [`images::source_build_context`]. fn read_gh_token_from_env() -> Option { @@ -159,6 +187,9 @@ fn main() { // Promtail log shipping is opt-in via env vars; same rationale as the // Cloudflare integration above. let promtail_config = read_promtail_config_from_env(); + // Prometheus metrics shipping is opt-in and reuses the Promtail token and + // instance label, so it is read after (and depends on) the Promtail config. + let metrics_config = read_metrics_config_from_env(promtail_config.as_ref()); // Opt-in: inlined into the build-context URL so Docker can fetch a private repo. let gh_token = read_gh_token_from_env(); let source_build_context = images::source_build_context(gh_token.as_deref()); @@ -210,6 +241,8 @@ fn main() { docker_socket_proxy: OrchestratorImage::Registry( images::DOCKER_SOCKET_PROXY_IMAGE.to_string(), ), + cadvisor: OrchestratorImage::Registry(images::CADVISOR_IMAGE.to_string()), + prometheus_agent: OrchestratorImage::Registry(images::PROMETHEUS_IMAGE.to_string()), }, directories: OrchestratorDirectories { asb_data_dir: PathBuf::from(ASB_DATA_DIR), @@ -217,6 +250,7 @@ fn main() { want_tor, cloudflared: cloudflared_config.clone(), promtail: promtail_config.clone(), + metrics: metrics_config.clone(), }; // If the config file already exists and be de-serialized, @@ -393,6 +427,11 @@ fn main() { .expect("Failed to write promtail.yml"); } + if let Some(metrics) = metrics_config.as_ref() { + std::fs::write(PROMETHEUS_CONFIG_FILE, build_prometheus_agent_yml(metrics)) + .expect("Failed to write prometheus.yml"); + } + // Write the compose to ./docker-compose.yml let compose = recipe.to_spec(); std::fs::write(DOCKER_COMPOSE_FILE, compose).expect("Failed to write docker-compose.yml"); @@ -407,6 +446,10 @@ fn main() { if let Some(promtail) = promtail_config.as_ref() { print_promtail_instructions(promtail); } + + if let Some(metrics) = metrics_config.as_ref() { + print_metrics_instructions(metrics); + } } /// Reads the ASB config from disk, inserts the WebSocket listen address and @@ -512,6 +555,23 @@ fn print_promtail_instructions(promtail: &PromtailConfig) { ); } +/// Prints the operator-facing summary for the Prometheus metrics agent so they +/// can verify it landed and know which Grafana query selects this host. +fn print_metrics_instructions(metrics: &MetricsConfig) { + println!(); + println!("Prometheus metrics shipping is enabled."); + println!(" - Instance label (host): {}", metrics.instance); + println!(" - Remote write URL: {}", metrics.remote_write_url); + println!(" - Config written to: {}", PROMETHEUS_CONFIG_FILE); + println!(" - Ships: per-container cpu/memory/pids/network/fs via cadvisor"); + println!(" - Verify after `docker compose up -d`:"); + println!(" docker compose logs --tail 50 prometheus-agent"); + println!( + " - Grafana query (cpu per container): rate(container_cpu_usage_seconds_total{{host=\"{}\"}}[5m])", + metrics.instance + ); +} + fn unix_epoch_secs() -> u64 { std::time::UNIX_EPOCH .elapsed() diff --git a/swap-orchestrator/tests/spec.rs b/swap-orchestrator/tests/spec.rs index b20a66bda5..42e84c1174 100644 --- a/swap-orchestrator/tests/spec.rs +++ b/swap-orchestrator/tests/spec.rs @@ -1,9 +1,9 @@ #![allow(unused_crate_dependencies)] use swap_orchestrator::compose::{ - CloudflaredConfig, IntoSpec, OrchestratorDirectories, OrchestratorImage, OrchestratorImages, - OrchestratorInput, OrchestratorNetworks, OrchestratorPorts, PromtailConfig, - build_promtail_yml, + CloudflaredConfig, IntoSpec, MetricsConfig, OrchestratorDirectories, OrchestratorImage, + OrchestratorImages, OrchestratorInput, OrchestratorNetworks, OrchestratorPorts, PromtailConfig, + build_prometheus_agent_yml, build_promtail_yml, }; use swap_orchestrator::images; @@ -11,6 +11,7 @@ fn make_input( want_tor: bool, cloudflared: Option, promtail: Option, + metrics: Option, ) -> OrchestratorInput { let source_build_context = images::source_build_context(None); OrchestratorInput { @@ -48,6 +49,8 @@ fn make_input( docker_socket_proxy: OrchestratorImage::Registry( images::DOCKER_SOCKET_PROXY_IMAGE.to_string(), ), + cadvisor: OrchestratorImage::Registry(images::CADVISOR_IMAGE.to_string()), + prometheus_agent: OrchestratorImage::Registry(images::PROMETHEUS_IMAGE.to_string()), }, directories: OrchestratorDirectories { asb_data_dir: std::path::PathBuf::from(swap_orchestrator::compose::ASB_DATA_DIR), @@ -55,6 +58,7 @@ fn make_input( want_tor, cloudflared, promtail, + metrics, } } @@ -75,17 +79,25 @@ fn sample_promtail_config() -> PromtailConfig { } } +fn sample_metrics_config() -> MetricsConfig { + MetricsConfig { + remote_write_url: "https://loki-asb-logs.example.com/api/v1/write".to_string(), + token: "test-token".to_string(), + instance: "asb-test-1".to_string(), + } +} + #[test] fn test_orchestrator_spec_generation() { // `to_spec` runs `validate_compose` internally, so generating each // variant is enough to catch indentation regressions in the optional // tor / cloudflared / promtail segments. - let _ = make_input(false, None, None).to_spec(); - let _ = make_input(true, None, None).to_spec(); - let _ = make_input(false, Some(sample_cloudflared_config()), None).to_spec(); - let _ = make_input(true, Some(sample_cloudflared_config()), None).to_spec(); - let compose = make_input(false, None, Some(sample_promtail_config())).to_spec(); - let _ = make_input(true, None, Some(sample_promtail_config())).to_spec(); + let _ = make_input(false, None, None, None).to_spec(); + let _ = make_input(true, None, None, None).to_spec(); + let _ = make_input(false, Some(sample_cloudflared_config()), None, None).to_spec(); + let _ = make_input(true, Some(sample_cloudflared_config()), None, None).to_spec(); + let compose = make_input(false, None, Some(sample_promtail_config()), None).to_spec(); + let _ = make_input(true, None, Some(sample_promtail_config()), None).to_spec(); // promtail's docker SD needs the networks API, not just containers, or // discovery 403s on /networks and no node logs ship. @@ -94,8 +106,26 @@ fn test_orchestrator_spec_generation() { true, Some(sample_cloudflared_config()), Some(sample_promtail_config()), + None, + ) + .to_spec(); + + // With metrics enabled, both cadvisor and the prometheus agent must appear. + let metrics_compose = make_input( + true, + Some(sample_cloudflared_config()), + Some(sample_promtail_config()), + Some(sample_metrics_config()), ) .to_spec(); + assert!(metrics_compose.contains("container_name: cadvisor")); + assert!(metrics_compose.contains("container_name: prometheus-agent")); + assert!(metrics_compose.contains("prometheus-agent-data:")); + + // Without metrics, neither service is generated. + let plain = make_input(false, None, None, None).to_spec(); + assert!(!plain.contains("cadvisor")); + assert!(!plain.contains("prometheus-agent")); } #[test] @@ -105,7 +135,7 @@ fn test_gh_token_inlined_into_build_context() { // A spec built from the authenticated context must still validate, and the // token must reach the build attribute of every source-built service. - let mut input = make_input(false, None, None); + let mut input = make_input(false, None, None, None); input.images.asb = OrchestratorImage::Build(images::asb_image_from_source(&context)); input.images.asb_controller = OrchestratorImage::Build(images::asb_controller_image_from_source(&context)); @@ -192,3 +222,30 @@ fn test_promtail_yml_ships_node_container_logs() { .expect("a host relabel must be present"); assert_eq!(host_relabel["replacement"].as_str(), Some("asb-test-1")); } + +#[test] +fn test_prometheus_agent_yml_is_valid_and_wired() { + let yml = build_prometheus_agent_yml(&sample_metrics_config()); + let parsed: serde_yaml::Value = + serde_yaml::from_str(&yml).expect("prometheus.yml must be valid YAML"); + + // The host external label must match the Promtail instance so metrics and + // logs share one selector in Grafana. + assert_eq!( + parsed["global"]["external_labels"]["host"].as_str(), + Some("asb-test-1") + ); + + // The agent scrapes the local cadvisor and pushes to the remote endpoint + // with the shared bearer token. + assert_eq!( + parsed["scrape_configs"][0]["static_configs"][0]["targets"][0].as_str(), + Some("cadvisor:8080") + ); + let remote = &parsed["remote_write"][0]; + assert_eq!( + remote["url"].as_str(), + Some("https://loki-asb-logs.example.com/api/v1/write") + ); + assert_eq!(remote["bearer_token"].as_str(), Some("test-token")); +} From 6fb8bd965f4a21ca0bcf94b5a247c8d8f83d282a Mon Sep 17 00:00:00 2001 From: binarybaron Date: Fri, 5 Jun 2026 13:59:06 +0200 Subject: [PATCH 2/4] fix(orchestrator): bump cadvisor to v0.52.1 + cgroup host v0.49.1's docker client speaks API 1.41, which Docker Engine 29 rejects (min 1.44), so the docker factory fails to register and per-container series lose their name=/image= labels. Bump to v0.52.1 and add cgroup: host so the cadvisor container can see the host's docker-*.scope cgroups under cgroup v2 (without it, scopes enumerate empty). --- swap-orchestrator/src/compose.rs | 3 +++ swap-orchestrator/src/images.rs | 7 +++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/swap-orchestrator/src/compose.rs b/swap-orchestrator/src/compose.rs index de786e68c0..8e7359d04b 100644 --- a/swap-orchestrator/src/compose.rs +++ b/swap-orchestrator/src/compose.rs @@ -406,6 +406,7 @@ fn build(input: OrchestratorInput) -> String { {image_cadvisor} restart: unless-stopped privileged: true + cgroup: host devices: - /dev/kmsg:/dev/kmsg volumes: @@ -534,6 +535,8 @@ services: restart: unless-stopped cap_add: - SYS_PTRACE + sysctls: + - net.ipv4.tcp_tw_reuse=1 depends_on: - electrs volumes: diff --git a/swap-orchestrator/src/images.rs b/swap-orchestrator/src/images.rs index d1ca56c7ab..3bf54ecf01 100644 --- a/swap-orchestrator/src/images.rs +++ b/swap-orchestrator/src/images.rs @@ -41,8 +41,11 @@ pub static PROMTAIL_IMAGE: &str = "grafana/promtail@sha256:8b2aa61745bc4a9343cc4 /// docker-socket-proxy 0.3.0 (https://hub.docker.com/r/tecnativa/docker-socket-proxy) pub static DOCKER_SOCKET_PROXY_IMAGE: &str = "tecnativa/docker-socket-proxy@sha256:9e4b9e7517a6b660f2cc903a19b257b1852d5b3344794e3ea334ff00ae677ac2"; -/// cadvisor v0.49.1 (https://github.com/google/cadvisor/pkgs/container/cadvisor) -pub static CADVISOR_IMAGE: &str = "gcr.io/cadvisor/cadvisor@sha256:3cde6faf0791ebf7b41d6f8ae7145466fed712ea6f252c935294d2608b1af388"; +/// cadvisor v0.52.1 (https://github.com/google/cadvisor/pkgs/container/cadvisor) +/// Older releases (e.g. v0.49.1) ship a Docker client speaking API 1.41, which +/// Docker Engine 29 rejects (min 1.44); the docker factory then fails to +/// register and per-container series lose their name=/image= labels. +pub static CADVISOR_IMAGE: &str = "gcr.io/cadvisor/cadvisor@sha256:f40e65878e25c2e78ea037f73a449527a0fb994e303dc3e34cb6b187b4b91435"; /// prometheus v3.1.0 (https://hub.docker.com/r/prom/prometheus) pub static PROMETHEUS_IMAGE: &str = "prom/prometheus@sha256:6559acbd5d770b15bb3c954629ce190ac3cbbdb2b7f1c30f0385c4e05104e218"; From fc6b7e4f4d3a15abcfb0dc7ce9a83f79900d0ac0 Mon Sep 17 00:00:00 2001 From: binarybaron Date: Fri, 5 Jun 2026 14:04:33 +0200 Subject: [PATCH 3/4] chore(orchestrator): trim comments --- swap-orchestrator/src/compose.rs | 27 --------------------------- swap-orchestrator/src/images.rs | 3 --- swap-orchestrator/src/main.rs | 11 ----------- 3 files changed, 41 deletions(-) diff --git a/swap-orchestrator/src/compose.rs b/swap-orchestrator/src/compose.rs index 8e7359d04b..8734ef1669 100644 --- a/swap-orchestrator/src/compose.rs +++ b/swap-orchestrator/src/compose.rs @@ -67,26 +67,10 @@ pub struct PromtailConfig { pub instance: String, } -/// Prometheus metrics-shipping configuration. -/// -/// When set, the orchestrator adds `cadvisor` and `prometheus-agent` services -/// to the compose file and writes a `prometheus.yml` next to -/// `docker-compose.yml`. cadvisor exposes per-container resource metrics; the -/// Prometheus agent scrapes it locally and `remote_write`s to a central -/// endpoint. The bearer token and host label are reused from [`PromtailConfig`] -/// so metrics and logs authenticate identically and share the same `host` -/// selector in Grafana. #[derive(Clone)] pub struct MetricsConfig { - /// Prometheus `remote_write` endpoint, e.g. - /// `https://asb-logs.example.com/api/v1/write`. pub remote_write_url: String, - /// Bearer token presented to the endpoint. The same token Promtail uses for - /// the Loki push — the central gate authorizes both with one token. pub token: String, - /// Short host identifier, exported as the `host` external label so a - /// deployment's metrics and logs select with the same query. Reused from - /// the Promtail instance. pub instance: String, } @@ -394,11 +378,6 @@ fn build(input: OrchestratorInput) -> String { }; let (metrics_segment, metrics_volume) = if input.metrics.is_some() { - // cadvisor reads cgroups/host paths read-only to expose per-container - // CPU/memory/PID/network/fs metrics. prometheus-agent runs in agent - // mode (no local query/storage beyond the WAL) and only scrapes - // cadvisor, then remote_writes to the central endpoint. The endpoint - // URL, bearer token and host label are baked into prometheus.yml. let metrics_segment = format!( "\ cadvisor: @@ -702,12 +681,6 @@ scrape_configs: ) } -/// Builds the YAML body of `prometheus.yml` for the host's Prometheus agent. -/// -/// The agent scrapes the local cadvisor and `remote_write`s to the central -/// endpoint. Values from [`MetricsConfig`] are baked in directly. The `host` -/// external label matches the Promtail `host` label so a deployment's metrics -/// and logs select identically in Grafana. pub fn build_prometheus_agent_yml(cfg: &MetricsConfig) -> String { fn yaml_single_quote(value: &str) -> String { format!("'{}'", value.replace('\'', "''")) diff --git a/swap-orchestrator/src/images.rs b/swap-orchestrator/src/images.rs index 3bf54ecf01..81719af295 100644 --- a/swap-orchestrator/src/images.rs +++ b/swap-orchestrator/src/images.rs @@ -42,9 +42,6 @@ pub static PROMTAIL_IMAGE: &str = "grafana/promtail@sha256:8b2aa61745bc4a9343cc4 pub static DOCKER_SOCKET_PROXY_IMAGE: &str = "tecnativa/docker-socket-proxy@sha256:9e4b9e7517a6b660f2cc903a19b257b1852d5b3344794e3ea334ff00ae677ac2"; /// cadvisor v0.52.1 (https://github.com/google/cadvisor/pkgs/container/cadvisor) -/// Older releases (e.g. v0.49.1) ship a Docker client speaking API 1.41, which -/// Docker Engine 29 rejects (min 1.44); the docker factory then fails to -/// register and per-container series lose their name=/image= labels. pub static CADVISOR_IMAGE: &str = "gcr.io/cadvisor/cadvisor@sha256:f40e65878e25c2e78ea037f73a449527a0fb994e303dc3e34cb6b187b4b91435"; /// prometheus v3.1.0 (https://hub.docker.com/r/prom/prometheus) diff --git a/swap-orchestrator/src/main.rs b/swap-orchestrator/src/main.rs index 7958d0809f..d8de36ed11 100644 --- a/swap-orchestrator/src/main.rs +++ b/swap-orchestrator/src/main.rs @@ -142,13 +142,6 @@ fn read_promtail_config_from_env() -> Option { }) } -/// Reads the Prometheus metrics-shipping configuration from the environment. -/// -/// Opt-in via `METRICS_REMOTE_WRITE_URL`. Metrics reuse Promtail's bearer token -/// and instance label (the central gate authorizes both with one token and the -/// `host` label must match), so Promtail must be configured too — enabling -/// metrics without it is a hard error rather than a silent, unauthenticated or -/// unlabelled push. fn read_metrics_config_from_env(promtail: Option<&PromtailConfig>) -> Option { let remote_write_url = std::env::var("METRICS_REMOTE_WRITE_URL").ok()?; @@ -187,8 +180,6 @@ fn main() { // Promtail log shipping is opt-in via env vars; same rationale as the // Cloudflare integration above. let promtail_config = read_promtail_config_from_env(); - // Prometheus metrics shipping is opt-in and reuses the Promtail token and - // instance label, so it is read after (and depends on) the Promtail config. let metrics_config = read_metrics_config_from_env(promtail_config.as_ref()); // Opt-in: inlined into the build-context URL so Docker can fetch a private repo. let gh_token = read_gh_token_from_env(); @@ -555,8 +546,6 @@ fn print_promtail_instructions(promtail: &PromtailConfig) { ); } -/// Prints the operator-facing summary for the Prometheus metrics agent so they -/// can verify it landed and know which Grafana query selects this host. fn print_metrics_instructions(metrics: &MetricsConfig) { println!(); println!("Prometheus metrics shipping is enabled."); From f57827593dbe1561d39f259c7e97cf43985c75dc Mon Sep 17 00:00:00 2001 From: binarybaron Date: Fri, 5 Jun 2026 14:09:22 +0200 Subject: [PATCH 4/4] chore(orchestrator): drop grafana query hint from metrics output --- swap-orchestrator/src/main.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/swap-orchestrator/src/main.rs b/swap-orchestrator/src/main.rs index d8de36ed11..d8ead92696 100644 --- a/swap-orchestrator/src/main.rs +++ b/swap-orchestrator/src/main.rs @@ -555,10 +555,6 @@ fn print_metrics_instructions(metrics: &MetricsConfig) { println!(" - Ships: per-container cpu/memory/pids/network/fs via cadvisor"); println!(" - Verify after `docker compose up -d`:"); println!(" docker compose logs --tail 50 prometheus-agent"); - println!( - " - Grafana query (cpu per container): rate(container_cpu_usage_seconds_total{{host=\"{}\"}}[5m])", - metrics.instance - ); } fn unix_epoch_secs() -> u64 {